added parser for Wotufa

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
2025-01-02 15:39:16 +01:00
parent 2491e4fbf0
commit 87c67f4aee
8 changed files with 171 additions and 13 deletions

View File

@@ -13,17 +13,12 @@ import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
@@ -68,6 +63,7 @@ public class Psychochor extends SinglePageImporter {
}
private Result<Coords> parseCoords(String s) {
// TODO: diese Koordinaten sind immer irgendwie daneben!?
var latitude = LATITUDE.matcher(s);
var longitude = LONGITUDE.matcher(s);
if (latitude.find() && longitude.find()){

View File

@@ -0,0 +1,164 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.neustadt;
import static de.srsoftware.cal.Util.parseGermanDate;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.CLASS;
import static de.srsoftware.tools.Tag.HREF;
import static de.srsoftware.tools.TagFilter.*;
import static java.util.function.Predicate.not;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
public class Wotufa extends BaseImporter {
private static final String DEFAULT_LOCATION = "Wotufa-Saal, Ziegenrücker Str. 6, 07806 Neustadt an der Orla";
public static final Coords DEFAULT_COORDS = new Coords(50.73184, 11.74444);
private static final LocalTime DEFAULT_TIME = LocalTime.of(19,0);
public Wotufa() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "http://wotufa.de";
}
@Override
public String description() {
return "Importer für Events im Wotufa-Saal Neustadt an der Orla";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return ofType("table").and(attributeEquals(CLASS,"kalDetl"));
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return cellWithContext("Details");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
return Payload.of(DEFAULT_COORDS);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return ofType("table").and(attributeContains(CLASS,"kalDetl"));
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
List<String> list = opt.get().find(IS_ANCHOR)
.stream().map(a -> a.get(HREF))
.filter(Objects::nonNull)
.filter(url -> url.contains("kal_Aktion=detail"))
.map(url -> url.contains("://") ? url : baseUrl() + url)
.map(url -> url.replace("&amp;","&")) // glitch on that website
.distinct()
.toList();
return Payload.of(list);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return ofType("table").and(attributeEquals(CLASS,"kalDetl"));
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
Result<Tag> locationTag = extractLocationTag(eventTag);
if (locationTag.optional().isEmpty()) return transform(locationTag);
var loc = locationTag.optional().get().strip();
return Payload.of("Neustadt an der Orla".equals(loc) ? DEFAULT_LOCATION : loc);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return cellWithContext("Ort");
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return cellWithContext("Datum");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return cellWithContext("Zeit").or(cellWithContext("Details"));
}
protected Result<Tag> extractStartTimeTag(Tag eventTag) {
var list = eventTag.find(extractStartTimeFilter());
if (list.isEmpty()) return error("Failed to find start time tag");
return Payload.of(new Tag("group").addAll(list));
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of("Wotufa","Neustadt/Orla");
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return cellWithContext("Veranstaltung");
}
private Predicate<Tag> cellWithContext(String key){
return ofType("td")
.and(not(withAttribute("width")))
.and(tag -> tag.parent().map(Tag::strip).filter(content -> content.startsWith(key)).isPresent());
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
return parseGermanDate(string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string).map(res -> res.optional().isEmpty() ? Payload.of(DEFAULT_TIME) : res);
}
@Override
protected String programURL() {
return baseUrl()+"/eventkalender/kalender.php";
}
}