added parser for Wotufa
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -14,6 +14,6 @@ dependencies {
|
||||
implementation("de.srsoftware:tools.logging:1.0.3")
|
||||
implementation("de.srsoftware:tools.plugin:1.0.1")
|
||||
implementation("de.srsoftware:tools.util:1.3.0")
|
||||
implementation("de.srsoftware:tools.web:1.3.12")
|
||||
implementation("de.srsoftware:tools.web:1.3.14")
|
||||
implementation("com.mysql:mysql-connector-j:9.1.0")
|
||||
}
|
||||
|
||||
@@ -5,6 +5,6 @@ dependencies {
|
||||
|
||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||
implementation("de.srsoftware:tools.util:1.3.0")
|
||||
implementation("de.srsoftware:tools.web:1.3.12")
|
||||
implementation("de.srsoftware:tools.web:1.3.14")
|
||||
implementation("org.json:json:20240303")
|
||||
}
|
||||
|
||||
@@ -13,8 +13,6 @@ import de.srsoftware.tools.*;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
|
||||
@@ -37,8 +37,8 @@ public class Util {
|
||||
public static final String VEVENT = "VEVENT";
|
||||
public static final String VCALENDAR = "VCALENDAR";
|
||||
|
||||
public static final Pattern GERMAN_DATE_PATTERN = Pattern.compile("\\D(\\d\\d?)\\.(\\d\\d?)\\.(\\d{4})\\D");
|
||||
public static final Pattern GERMAN_TIME_PATTERN = Pattern.compile("\\D(\\d\\d?):(\\d\\d?)(:(\\d\\d?))?\\D");
|
||||
public static final Pattern GERMAN_DATE_PATTERN = Pattern.compile("^\\D*(\\d\\d?)\\.(\\d\\d?)\\.(\\d{4})\\D");
|
||||
public static final Pattern GERMAN_TIME_PATTERN = Pattern.compile("(\\d\\d?):(\\d\\d?)(:(\\d\\d?))?\\D");
|
||||
private static final Pattern BG_IMAGE_URL = Pattern.compile("background(-image)?:\\surl\\(([^)]+)\\)");
|
||||
private static final System.Logger LOG = System.getLogger(Util.class.getSimpleName());
|
||||
|
||||
|
||||
@@ -326,7 +326,7 @@ public class MariaDB implements Database {
|
||||
.apply(event.title(), event.description(), start, end, location, coords);
|
||||
|
||||
// TODO: update links, attachments, tags
|
||||
|
||||
LOG.log(WARNING,"updating of tags, links and attachments not implemented!");
|
||||
return Payload.of(event);
|
||||
} catch (SQLException sqle) {
|
||||
return error(sqle, "Failed to update database entry");
|
||||
|
||||
@@ -5,5 +5,5 @@ dependencies {
|
||||
implementation(project(":de.srsoftware.cal.base"))
|
||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||
implementation("de.srsoftware:tools.util:1.3.0")
|
||||
implementation("de.srsoftware:tools.web:1.3.12")
|
||||
implementation("de.srsoftware:tools.web:1.3.14")
|
||||
}
|
||||
|
||||
@@ -13,17 +13,12 @@ import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@@ -68,6 +63,7 @@ public class Psychochor extends SinglePageImporter {
|
||||
}
|
||||
|
||||
private Result<Coords> parseCoords(String s) {
|
||||
// TODO: diese Koordinaten sind immer irgendwie daneben!?
|
||||
var latitude = LATITUDE.matcher(s);
|
||||
var longitude = LONGITUDE.matcher(s);
|
||||
if (latitude.find() && longitude.find()){
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer.neustadt;
|
||||
|
||||
import static de.srsoftware.cal.Util.parseGermanDate;
|
||||
import static de.srsoftware.cal.Util.parseGermanTime;
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.CLASS;
|
||||
import static de.srsoftware.tools.Tag.HREF;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
import static java.util.function.Predicate.not;
|
||||
|
||||
import de.srsoftware.cal.BaseImporter;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class Wotufa extends BaseImporter {
|
||||
|
||||
private static final String DEFAULT_LOCATION = "Wotufa-Saal, Ziegenrücker Str. 6, 07806 Neustadt an der Orla";
|
||||
public static final Coords DEFAULT_COORDS = new Coords(50.73184, 11.74444);
|
||||
private static final LocalTime DEFAULT_TIME = LocalTime.of(19,0);
|
||||
|
||||
public Wotufa() throws NoSuchAlgorithmException {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String baseUrl() {
|
||||
return "http://wotufa.de";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Importer für Events im Wotufa-Saal Neustadt an der Orla";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||
return ofType("table").and(attributeEquals(CLASS,"kalDetl"));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return cellWithContext("Details");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
return Payload.of(DEFAULT_COORDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndDateFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndTimeFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEventTagFilter() {
|
||||
return ofType("table").and(attributeContains(CLASS,"kalDetl"));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||
var opt = programPage.optional();
|
||||
if (opt.isEmpty()) return transform(programPage);
|
||||
List<String> list = opt.get().find(IS_ANCHOR)
|
||||
.stream().map(a -> a.get(HREF))
|
||||
.filter(Objects::nonNull)
|
||||
.filter(url -> url.contains("kal_Aktion=detail"))
|
||||
.map(url -> url.contains("://") ? url : baseUrl() + url)
|
||||
.map(url -> url.replace("&","&")) // glitch on that website
|
||||
.distinct()
|
||||
.toList();
|
||||
return Payload.of(list);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLinksFilter() {
|
||||
return ofType("table").and(attributeEquals(CLASS,"kalDetl"));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<String> extractLocation(Tag eventTag) {
|
||||
Result<Tag> locationTag = extractLocationTag(eventTag);
|
||||
if (locationTag.optional().isEmpty()) return transform(locationTag);
|
||||
var loc = locationTag.optional().get().strip();
|
||||
return Payload.of("Neustadt an der Orla".equals(loc) ? DEFAULT_LOCATION : loc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLocationFilter() {
|
||||
return cellWithContext("Ort");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartDateFilter() {
|
||||
return cellWithContext("Datum");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartTimeFilter() {
|
||||
return cellWithContext("Zeit").or(cellWithContext("Details"));
|
||||
}
|
||||
|
||||
protected Result<Tag> extractStartTimeTag(Tag eventTag) {
|
||||
var list = eventTag.find(extractStartTimeFilter());
|
||||
if (list.isEmpty()) return error("Failed to find start time tag");
|
||||
return Payload.of(new Tag("group").addAll(list));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> extractTags(Tag eventTag) {
|
||||
return List.of("Wotufa","Neustadt/Orla");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractTitleFilter() {
|
||||
return cellWithContext("Veranstaltung");
|
||||
}
|
||||
|
||||
private Predicate<Tag> cellWithContext(String key){
|
||||
return ofType("td")
|
||||
.and(not(withAttribute("width")))
|
||||
.and(tag -> tag.parent().map(Tag::strip).filter(content -> content.startsWith(key)).isPresent());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseEndDate(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseEndTime(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseStartDate(String string) {
|
||||
return parseGermanDate(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseStartTime(String string) {
|
||||
return parseGermanTime(string).map(res -> res.optional().isEmpty() ? Payload.of(DEFAULT_TIME) : res);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String programURL() {
|
||||
return baseUrl()+"/eventkalender/kalender.php";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user