added importer for Kasseturm Weimar

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
2025-01-30 23:45:58 +01:00
parent 7c5253c162
commit 34a9637845
3 changed files with 153 additions and 7 deletions

View File

@@ -0,0 +1,152 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.weimar;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.CLASS;
import static de.srsoftware.tools.Tag.HREF;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
public class Kasseturm extends BaseImporter {
public static final Coords DEFAULT_COORDS= new Coords(50.98191, 11.32599);
@Override
protected String baseUrl() {
return "https://kasseturm.de/";
}
@Override
public String description() {
return "Importer für Events des Kasseturm Weimar";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeEquals(CLASS,"wpem-single-event-body-content");
}
@Override
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
var desc = new Tag("div");
super.extractDescriptionTag(eventTag).optional().ifPresent(tag -> tag.find(IS_PARAGRAPH).forEach(desc::add));
return Payload.of(desc);
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeEquals(CLASS,"wpem-single-event-body-content");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
return Payload.of(DEFAULT_COORDS);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return attributeEquals(CLASS,"wpem-single-event-body");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
var list = opt.get()
.find(withAttribute(HREF))
.stream()
.map(anchor -> anchor.get(HREF))
.filter(Objects::nonNull)
.filter(url -> url.contains("/veranstaltung/"))
.distinct()
.toList();
return Payload.of(list);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeEquals(CLASS,"wpem-single-event-body");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
String loc = "Kasseturm, Goetheplatz 10, 99423 Weimar";
var opt = super.extractLocation(eventTag).optional();
return Payload.of(opt.map(s -> s.trim()+", "+loc).orElse(loc));
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return attributeContains(HREF,"maps.google.com");
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeEquals(CLASS,"wpem-event-date-time-text");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeEquals(CLASS,"wpem-event-date-time-text");
}
@Override
protected List<String> extractTags(Tag eventTag) {
var list = new ArrayList<String>();
list.add("Weimar");
list.add("Kasseturm");
eventTag.find(IS_SPAN.and(attributeHas(CLASS,"event-type").or(attributeHas(CLASS,"event-category")))).forEach(tag -> tag.inner(0).ifPresent(list::add));
return list;
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return ofType("h3");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
return Util.parseGermanDate(string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return Util.parseGermanTime(string);
}
@Override
protected String programURL() {
return baseUrl();
}
}