refactored Rosenkeller importer
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -14,6 +14,6 @@ dependencies {
|
|||||||
implementation("de.srsoftware:tools.logging:1.0.3")
|
implementation("de.srsoftware:tools.logging:1.0.3")
|
||||||
implementation("de.srsoftware:tools.plugin:1.0.1")
|
implementation("de.srsoftware:tools.plugin:1.0.1")
|
||||||
implementation("de.srsoftware:tools.util:1.3.0")
|
implementation("de.srsoftware:tools.util:1.3.0")
|
||||||
implementation("de.srsoftware:tools.web:1.3.10")
|
implementation("de.srsoftware:tools.web:1.3.11")
|
||||||
implementation("com.mysql:mysql-connector-j:9.1.0")
|
implementation("com.mysql:mysql-connector-j:9.1.0")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,6 @@ dependencies {
|
|||||||
|
|
||||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||||
implementation("de.srsoftware:tools.util:1.3.0")
|
implementation("de.srsoftware:tools.util:1.3.0")
|
||||||
implementation("de.srsoftware:tools.web:1.3.10")
|
implementation("de.srsoftware:tools.web:1.3.11")
|
||||||
implementation("org.json:json:20240303")
|
implementation("org.json:json:20240303")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
|
|
||||||
protected Result<Tag> extractDescriptionTag(Tag eventTag){
|
protected Result<Tag> extractDescriptionTag(Tag eventTag){
|
||||||
var list = eventTag.find(extractDescriptionFilter());
|
var list = eventTag.find(extractDescriptionFilter());
|
||||||
if (list.isEmpty()) return error("Failed to find attachments tag");
|
if (list.isEmpty()) return error("Failed to find description tag");
|
||||||
return Payload.of(list.getFirst());
|
return Payload.of(list.getFirst());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,7 +97,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
return parseEndDate(opt.get().strip());
|
return parseEndDate(opt.get().strip());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Result<Tag> extractEndDateTag(Tag eventTag) {
|
protected Result<Tag> extractEndDateTag(Tag eventTag) {
|
||||||
var list = eventTag.find(extractEndDateFilter());
|
var list = eventTag.find(extractEndDateFilter());
|
||||||
if (list.isEmpty()) return error("Failed to find end date tag");
|
if (list.isEmpty()) return error("Failed to find end date tag");
|
||||||
return Payload.of(list.getFirst());
|
return Payload.of(list.getFirst());
|
||||||
@@ -112,7 +112,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
return parseEndTime(opt.get().strip());
|
return parseEndTime(opt.get().strip());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Result<Tag> extractEndTimeTag(Tag eventTag) {
|
protected Result<Tag> extractEndTimeTag(Tag eventTag) {
|
||||||
var list = eventTag.find(extractEndTimeFilter());
|
var list = eventTag.find(extractEndTimeFilter());
|
||||||
if (list.isEmpty()) return error("Failed to find end time tag");
|
if (list.isEmpty()) return error("Failed to find end time tag");
|
||||||
return Payload.of(list.getFirst());
|
return Payload.of(list.getFirst());
|
||||||
@@ -232,7 +232,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
return parseStartDate(opt.get().strip());
|
return parseStartDate(opt.get().strip());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Result<Tag> extractStartDateTag(Tag eventTag) {
|
protected Result<Tag> extractStartDateTag(Tag eventTag) {
|
||||||
var list = eventTag.find(extractStartDateFilter());
|
var list = eventTag.find(extractStartDateFilter());
|
||||||
if (list.isEmpty()) return error("Failed to find start date tag");
|
if (list.isEmpty()) return error("Failed to find start date tag");
|
||||||
return Payload.of(list.getFirst());
|
return Payload.of(list.getFirst());
|
||||||
@@ -247,7 +247,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
return parseStartTime(opt.get().strip());
|
return parseStartTime(opt.get().strip());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Result<Tag> extractStartTimeTag(Tag eventTag) {
|
protected Result<Tag> extractStartTimeTag(Tag eventTag) {
|
||||||
var list = eventTag.find(extractStartTimeFilter());
|
var list = eventTag.find(extractStartTimeFilter());
|
||||||
if (list.isEmpty()) return error("Failed to find start time tag");
|
if (list.isEmpty()) return error("Failed to find start time tag");
|
||||||
return Payload.of(list.getFirst());
|
return Payload.of(list.getFirst());
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ public class Util {
|
|||||||
var second = sec == null ? 0 : Integer.parseInt(sec);
|
var second = sec == null ? 0 : Integer.parseInt(sec);
|
||||||
return Payload.of(LocalTime.of(hour,minute,second));
|
return Payload.of(LocalTime.of(hour,minute,second));
|
||||||
}
|
}
|
||||||
return error("Failed to find date");
|
return error("Failed to find time");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -5,5 +5,5 @@ dependencies {
|
|||||||
implementation(project(":de.srsoftware.cal.base"))
|
implementation(project(":de.srsoftware.cal.base"))
|
||||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||||
implementation("de.srsoftware:tools.util:1.3.0")
|
implementation("de.srsoftware:tools.util:1.3.0")
|
||||||
implementation("de.srsoftware:tools.web:1.3.10")
|
implementation("de.srsoftware:tools.web:1.3.11")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,27 +2,31 @@
|
|||||||
package de.srsoftware.cal.importer.jena;
|
package de.srsoftware.cal.importer.jena;
|
||||||
|
|
||||||
import static de.srsoftware.tools.Error.error;
|
import static de.srsoftware.tools.Error.error;
|
||||||
import static de.srsoftware.tools.Optionals.nullable;
|
|
||||||
import static de.srsoftware.tools.Result.transform;
|
import static de.srsoftware.tools.Result.transform;
|
||||||
|
import static de.srsoftware.tools.Tag.*;
|
||||||
import static de.srsoftware.tools.TagFilter.*;
|
import static de.srsoftware.tools.TagFilter.*;
|
||||||
|
|
||||||
import de.srsoftware.cal.BaseImporter;
|
import de.srsoftware.cal.BaseImporter;
|
||||||
|
import de.srsoftware.cal.Util;
|
||||||
import de.srsoftware.cal.api.Coords;
|
import de.srsoftware.cal.api.Coords;
|
||||||
import de.srsoftware.tools.Payload;
|
import de.srsoftware.tools.Payload;
|
||||||
import de.srsoftware.tools.Result;
|
import de.srsoftware.tools.Result;
|
||||||
import de.srsoftware.tools.Tag;
|
import de.srsoftware.tools.Tag;
|
||||||
|
import de.srsoftware.tools.TagFilter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
public abstract class Rosenkeller extends BaseImporter {
|
public class Rosenkeller extends BaseImporter {
|
||||||
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
|
|
||||||
private static final String BASE_URL = "https://rosenkeller.org";
|
private static final String BASE_URL = "https://rosenkeller.org";
|
||||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
|
||||||
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
||||||
private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
||||||
public Rosenkeller() throws NoSuchAlgorithmException {
|
public Rosenkeller() throws NoSuchAlgorithmException {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
@@ -33,8 +37,18 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
|
public String description() {
|
||||||
return Payload.of(eventTag);
|
return "Importiert Events des Studentenclubs „Rosenkeller“ in Jena";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||||
|
return TagFilter.attributeEquals(ID,"tribe-events-content");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractDescriptionFilter() {
|
||||||
|
return attributeContains(CLASS,"single-event-description");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -42,53 +56,80 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
|||||||
return Payload.of(COORDS);
|
return Payload.of(COORDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
protected Result<LocalDate> extractEndDate(Tag eventTag) {
|
||||||
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
|
Result<Tag> endDateTag = extractEndDateTag(eventTag);
|
||||||
var opt = eventTag //
|
var opt = endDateTag.optional();
|
||||||
.find(attributeHas("class", "tribe-events-single-event-description"))
|
if (opt.isEmpty()) return transform(endDateTag);
|
||||||
.stream()
|
return parseEndDate(opt.get().get(TITLE));
|
||||||
.findAny();
|
|
||||||
if (opt.isPresent()) return Payload.of(opt.get());
|
|
||||||
return error("Failed to find description tag");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected Result<Tag> extractEndTag(Tag eventTag) {
|
|
||||||
return error("extractEndTag(…) not supported");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
|
protected Predicate<Tag> extractEndDateFilter() {
|
||||||
if (pageResult.optional().isEmpty()) return transform(pageResult);
|
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"date")));
|
||||||
var list = pageResult.optional().get().find(attributeEquals("id", APPOINTMENT_TAG_ID));
|
}
|
||||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
|
||||||
return error("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
|
@Override
|
||||||
|
protected Predicate<Tag> extractEndTimeFilter() {
|
||||||
|
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"time")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractEventTagFilter() {
|
||||||
|
return TagFilter.attributeEquals(ID,"tribe-events-content");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||||
if (programPage.optional().isEmpty()) return transform(programPage);
|
if (programPage.optional().isEmpty()) return transform(programPage);
|
||||||
List<String> list = programPage.optional()
|
List<String> list = programPage.optional()
|
||||||
.get() //
|
.get() //
|
||||||
.find(attributeStartsWith("id", "event-"))
|
.find(attributeStartsWith(ID, "event-"))
|
||||||
.stream()
|
.stream()
|
||||||
.map(t -> t.find(attributeEquals("class", "ect-event-url")))
|
.map(t -> t.find(attributeEquals(CLASS, "ect-event-url")))
|
||||||
.flatMap(List::stream)
|
.flatMap(List::stream)
|
||||||
.map(t -> t.get("href"))
|
.map(t -> t.get(HREF))
|
||||||
.toList();
|
.toList();
|
||||||
return Payload.of(list);
|
return Payload.of(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
protected Predicate<Tag> extractLinksFilter() {
|
||||||
return Payload.of(new Tag("span").content(DEFAULT_LOCATION));
|
return attributeContains(CLASS,"single-event-description");
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Result<Tag> extractStartTag(Tag eventTag) {
|
@Override
|
||||||
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start"));
|
protected Result<String> extractLocation(Tag eventTag) {
|
||||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
return Payload.of(DEFAULT_LOCATION);
|
||||||
return error("Failed to locate start tag");
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractLocationFilter() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Result<LocalDate> extractStartDate(Tag eventTag) {
|
||||||
|
Result<Tag> startDateTag = extractStartDateTag(eventTag);
|
||||||
|
var opt = startDateTag.optional();
|
||||||
|
if (opt.isEmpty()) return transform(startDateTag);
|
||||||
|
return parseStartDate(opt.get().get(TITLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Result<Tag> extractStartDateTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(extractStartDateFilter());
|
||||||
|
if (list.isEmpty()) {
|
||||||
|
return error("Failed to find start date tag");
|
||||||
|
}
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractStartDateFilter() {
|
||||||
|
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"date")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractStartTimeFilter() {
|
||||||
|
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"time")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -97,33 +138,36 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractTitleTag(Tag eventTag) {
|
protected Predicate<Tag> extractTitleFilter() {
|
||||||
var list = eventTag.find(attributeEndsWith("class", "single-event-title"));
|
return ofType("h1");
|
||||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
|
||||||
return error("Failed to find title tag");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<LocalDate> parseEndDate(String text) {
|
protected Result<LocalDate> parseEndDate(String string) {
|
||||||
return error("parseEndDate(…) not supported");
|
try {
|
||||||
}
|
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
|
||||||
|
} catch (Exception e){
|
||||||
@Override
|
return error(e,"Failed to parse date: %s",string);
|
||||||
protected Result<LocalDate> parseStartDate(String text) {
|
|
||||||
var match = DATE_PATTERN.matcher(text);
|
|
||||||
if (match.find()) {
|
|
||||||
var dayOfMonth = Integer.parseInt(match.group(1));
|
|
||||||
var month = toNumericMonth(match.group(2));
|
|
||||||
if (month.optional().isEmpty()) return transform(month);
|
|
||||||
|
|
||||||
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0"));
|
|
||||||
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0"));
|
|
||||||
var now = LocalDateTime.now();
|
|
||||||
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute);
|
|
||||||
if (date.isBefore(now)) date = date.plusYears(1);
|
|
||||||
//return Payload.of(date);
|
|
||||||
}
|
}
|
||||||
return error("Failed to recognize a date in \"%s\"", text);
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalTime> parseEndTime(String string) {
|
||||||
|
return Util.parseGermanTime(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDate> parseStartDate(String string) {
|
||||||
|
try {
|
||||||
|
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
|
||||||
|
} catch (Exception e){
|
||||||
|
return error(e,"Failed to parse date: %s",string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalTime> parseStartTime(String string) {
|
||||||
|
return Util.parseGermanTime(string);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
Reference in New Issue
Block a user