refactored Rosenkeller importer

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
2025-01-01 18:21:12 +01:00
parent 3c7313fc01
commit af4a06afc0
6 changed files with 117 additions and 73 deletions

View File

@@ -14,6 +14,6 @@ dependencies {
implementation("de.srsoftware:tools.logging:1.0.3")
implementation("de.srsoftware:tools.plugin:1.0.1")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("de.srsoftware:tools.web:1.3.11")
implementation("com.mysql:mysql-connector-j:9.1.0")
}

View File

@@ -5,6 +5,6 @@ dependencies {
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("de.srsoftware:tools.web:1.3.11")
implementation("org.json:json:20240303")
}

View File

@@ -77,7 +77,7 @@ public abstract class BaseImporter implements Importer {
protected Result<Tag> extractDescriptionTag(Tag eventTag){
var list = eventTag.find(extractDescriptionFilter());
if (list.isEmpty()) return error("Failed to find attachments tag");
if (list.isEmpty()) return error("Failed to find description tag");
return Payload.of(list.getFirst());
}
@@ -97,7 +97,7 @@ public abstract class BaseImporter implements Importer {
return parseEndDate(opt.get().strip());
}
private Result<Tag> extractEndDateTag(Tag eventTag) {
protected Result<Tag> extractEndDateTag(Tag eventTag) {
var list = eventTag.find(extractEndDateFilter());
if (list.isEmpty()) return error("Failed to find end date tag");
return Payload.of(list.getFirst());
@@ -112,7 +112,7 @@ public abstract class BaseImporter implements Importer {
return parseEndTime(opt.get().strip());
}
private Result<Tag> extractEndTimeTag(Tag eventTag) {
protected Result<Tag> extractEndTimeTag(Tag eventTag) {
var list = eventTag.find(extractEndTimeFilter());
if (list.isEmpty()) return error("Failed to find end time tag");
return Payload.of(list.getFirst());
@@ -232,7 +232,7 @@ public abstract class BaseImporter implements Importer {
return parseStartDate(opt.get().strip());
}
private Result<Tag> extractStartDateTag(Tag eventTag) {
protected Result<Tag> extractStartDateTag(Tag eventTag) {
var list = eventTag.find(extractStartDateFilter());
if (list.isEmpty()) return error("Failed to find start date tag");
return Payload.of(list.getFirst());
@@ -247,7 +247,7 @@ public abstract class BaseImporter implements Importer {
return parseStartTime(opt.get().strip());
}
private Result<Tag> extractStartTimeTag(Tag eventTag) {
protected Result<Tag> extractStartTimeTag(Tag eventTag) {
var list = eventTag.find(extractStartTimeFilter());
if (list.isEmpty()) return error("Failed to find start time tag");
return Payload.of(list.getFirst());

View File

@@ -111,7 +111,7 @@ public class Util {
var second = sec == null ? 0 : Integer.parseInt(sec);
return Payload.of(LocalTime.of(hour,minute,second));
}
return error("Failed to find date");
return error("Failed to find time");
}
/**

View File

@@ -5,5 +5,5 @@ dependencies {
implementation(project(":de.srsoftware.cal.base"))
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("de.srsoftware:tools.web:1.3.11")
}

View File

@@ -2,27 +2,31 @@
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Optionals.nullable;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import de.srsoftware.tools.TagFilter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.regex.Pattern;
import java.util.function.Predicate;
public abstract class Rosenkeller extends BaseImporter {
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
public class Rosenkeller extends BaseImporter {
private static final String BASE_URL = "https://rosenkeller.org";
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
private static final Coords COORDS = new Coords(50.92945, 11.58491);
private static final Coords COORDS = new Coords(50.92945, 11.58491);
public Rosenkeller() throws NoSuchAlgorithmException {
super();
}
@@ -33,8 +37,18 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
}
@Override
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
return Payload.of(eventTag);
public String description() {
return "Importiert Events des Studentenclubs „Rosenkeller“ in Jena";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeContains(CLASS,"single-event-description");
}
@Override
@@ -42,53 +56,80 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
return Payload.of(COORDS);
}
@Override
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
var opt = eventTag //
.find(attributeHas("class", "tribe-events-single-event-description"))
.stream()
.findAny();
if (opt.isPresent()) return Payload.of(opt.get());
return error("Failed to find description tag");
}
protected Result<Tag> extractEndTag(Tag eventTag) {
return error("extractEndTag(…) not supported");
protected Result<LocalDate> extractEndDate(Tag eventTag) {
Result<Tag> endDateTag = extractEndDateTag(eventTag);
var opt = endDateTag.optional();
if (opt.isEmpty()) return transform(endDateTag);
return parseEndDate(opt.get().get(TITLE));
}
@Override
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
if (pageResult.optional().isEmpty()) return transform(pageResult);
var list = pageResult.optional().get().find(attributeEquals("id", APPOINTMENT_TAG_ID));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
protected Predicate<Tag> extractEndDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"date")));
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"time")));
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
if (programPage.optional().isEmpty()) return transform(programPage);
List<String> list = programPage.optional()
.get() //
.find(attributeStartsWith("id", "event-"))
.stream()
.map(t -> t.find(attributeEquals("class", "ect-event-url")))
.flatMap(List::stream)
.map(t -> t.get("href"))
.toList();
.get() //
.find(attributeStartsWith(ID, "event-"))
.stream()
.map(t -> t.find(attributeEquals(CLASS, "ect-event-url")))
.flatMap(List::stream)
.map(t -> t.get(HREF))
.toList();
return Payload.of(list);
}
@Override
protected Result<Tag> extractLocationTag(Tag eventTag) {
return Payload.of(new Tag("span").content(DEFAULT_LOCATION));
protected Predicate<Tag> extractLinksFilter() {
return attributeContains(CLASS,"single-event-description");
}
protected Result<Tag> extractStartTag(Tag eventTag) {
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start"));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Failed to locate start tag");
@Override
protected Result<String> extractLocation(Tag eventTag) {
return Payload.of(DEFAULT_LOCATION);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return null;
}
protected Result<LocalDate> extractStartDate(Tag eventTag) {
Result<Tag> startDateTag = extractStartDateTag(eventTag);
var opt = startDateTag.optional();
if (opt.isEmpty()) return transform(startDateTag);
return parseStartDate(opt.get().get(TITLE));
}
protected Result<Tag> extractStartDateTag(Tag eventTag) {
var list = eventTag.find(extractStartDateFilter());
if (list.isEmpty()) {
return error("Failed to find start date tag");
}
return Payload.of(list.getFirst());
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"date")));
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"time")));
}
@Override
@@ -97,33 +138,36 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
}
@Override
protected Result<Tag> extractTitleTag(Tag eventTag) {
var list = eventTag.find(attributeEndsWith("class", "single-event-title"));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Failed to find title tag");
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDate> parseEndDate(String text) {
return error("parseEndDate(…) not supported");
}
@Override
protected Result<LocalDate> parseStartDate(String text) {
var match = DATE_PATTERN.matcher(text);
if (match.find()) {
var dayOfMonth = Integer.parseInt(match.group(1));
var month = toNumericMonth(match.group(2));
if (month.optional().isEmpty()) return transform(month);
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0"));
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0"));
var now = LocalDateTime.now();
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute);
if (date.isBefore(now)) date = date.plusYears(1);
//return Payload.of(date);
protected Result<LocalDate> parseEndDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
return error("Failed to recognize a date in \"%s\"", text);
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return Util.parseGermanTime(string);
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return Util.parseGermanTime(string);
}
@Override