Browse Source

refactored Rosenkeller importer

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
main
Stephan Richter 4 months ago
parent
commit
af4a06afc0
  1. 2
      de.srsoftware.cal.app/build.gradle.kts
  2. 2
      de.srsoftware.cal.base/build.gradle.kts
  3. 10
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java
  4. 2
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java
  5. 2
      de.srsoftware.cal.importer/build.gradle.kts
  6. 156
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

2
de.srsoftware.cal.app/build.gradle.kts

@ -14,6 +14,6 @@ dependencies { @@ -14,6 +14,6 @@ dependencies {
implementation("de.srsoftware:tools.logging:1.0.3")
implementation("de.srsoftware:tools.plugin:1.0.1")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("de.srsoftware:tools.web:1.3.11")
implementation("com.mysql:mysql-connector-j:9.1.0")
}

2
de.srsoftware.cal.base/build.gradle.kts

@ -5,6 +5,6 @@ dependencies { @@ -5,6 +5,6 @@ dependencies {
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("de.srsoftware:tools.web:1.3.11")
implementation("org.json:json:20240303")
}

10
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java

@ -77,7 +77,7 @@ public abstract class BaseImporter implements Importer { @@ -77,7 +77,7 @@ public abstract class BaseImporter implements Importer {
protected Result<Tag> extractDescriptionTag(Tag eventTag){
var list = eventTag.find(extractDescriptionFilter());
if (list.isEmpty()) return error("Failed to find attachments tag");
if (list.isEmpty()) return error("Failed to find description tag");
return Payload.of(list.getFirst());
}
@ -97,7 +97,7 @@ public abstract class BaseImporter implements Importer { @@ -97,7 +97,7 @@ public abstract class BaseImporter implements Importer {
return parseEndDate(opt.get().strip());
}
private Result<Tag> extractEndDateTag(Tag eventTag) {
protected Result<Tag> extractEndDateTag(Tag eventTag) {
var list = eventTag.find(extractEndDateFilter());
if (list.isEmpty()) return error("Failed to find end date tag");
return Payload.of(list.getFirst());
@ -112,7 +112,7 @@ public abstract class BaseImporter implements Importer { @@ -112,7 +112,7 @@ public abstract class BaseImporter implements Importer {
return parseEndTime(opt.get().strip());
}
private Result<Tag> extractEndTimeTag(Tag eventTag) {
protected Result<Tag> extractEndTimeTag(Tag eventTag) {
var list = eventTag.find(extractEndTimeFilter());
if (list.isEmpty()) return error("Failed to find end time tag");
return Payload.of(list.getFirst());
@ -232,7 +232,7 @@ public abstract class BaseImporter implements Importer { @@ -232,7 +232,7 @@ public abstract class BaseImporter implements Importer {
return parseStartDate(opt.get().strip());
}
private Result<Tag> extractStartDateTag(Tag eventTag) {
protected Result<Tag> extractStartDateTag(Tag eventTag) {
var list = eventTag.find(extractStartDateFilter());
if (list.isEmpty()) return error("Failed to find start date tag");
return Payload.of(list.getFirst());
@ -247,7 +247,7 @@ public abstract class BaseImporter implements Importer { @@ -247,7 +247,7 @@ public abstract class BaseImporter implements Importer {
return parseStartTime(opt.get().strip());
}
private Result<Tag> extractStartTimeTag(Tag eventTag) {
protected Result<Tag> extractStartTimeTag(Tag eventTag) {
var list = eventTag.find(extractStartTimeFilter());
if (list.isEmpty()) return error("Failed to find start time tag");
return Payload.of(list.getFirst());

2
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java

@ -111,7 +111,7 @@ public class Util { @@ -111,7 +111,7 @@ public class Util {
var second = sec == null ? 0 : Integer.parseInt(sec);
return Payload.of(LocalTime.of(hour,minute,second));
}
return error("Failed to find date");
return error("Failed to find time");
}
/**

2
de.srsoftware.cal.importer/build.gradle.kts

@ -5,5 +5,5 @@ dependencies { @@ -5,5 +5,5 @@ dependencies {
implementation(project(":de.srsoftware.cal.base"))
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("de.srsoftware:tools.web:1.3.11")
}

156
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

@ -2,27 +2,31 @@ @@ -2,27 +2,31 @@
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Optionals.nullable;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import de.srsoftware.tools.TagFilter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.regex.Pattern;
import java.util.function.Predicate;
public abstract class Rosenkeller extends BaseImporter {
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
public class Rosenkeller extends BaseImporter {
private static final String BASE_URL = "https://rosenkeller.org";
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
private static final Coords COORDS = new Coords(50.92945, 11.58491);
private static final Coords COORDS = new Coords(50.92945, 11.58491);
public Rosenkeller() throws NoSuchAlgorithmException {
super();
}
@ -33,8 +37,18 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491); @@ -33,8 +37,18 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
}
@Override
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
return Payload.of(eventTag);
public String description() {
return "Importiert Events des Studentenclubs „Rosenkeller“ in Jena";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeContains(CLASS,"single-event-description");
}
@Override
@ -42,53 +56,80 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491); @@ -42,53 +56,80 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
return Payload.of(COORDS);
}
@Override
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
var opt = eventTag //
.find(attributeHas("class", "tribe-events-single-event-description"))
.stream()
.findAny();
if (opt.isPresent()) return Payload.of(opt.get());
return error("Failed to find description tag");
protected Result<LocalDate> extractEndDate(Tag eventTag) {
Result<Tag> endDateTag = extractEndDateTag(eventTag);
var opt = endDateTag.optional();
if (opt.isEmpty()) return transform(endDateTag);
return parseEndDate(opt.get().get(TITLE));
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"date")));
}
protected Result<Tag> extractEndTag(Tag eventTag) {
return error("extractEndTag(…) not supported");
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"time")));
}
@Override
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
if (pageResult.optional().isEmpty()) return transform(pageResult);
var list = pageResult.optional().get().find(attributeEquals("id", APPOINTMENT_TAG_ID));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
protected Predicate<Tag> extractEventTagFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
if (programPage.optional().isEmpty()) return transform(programPage);
List<String> list = programPage.optional()
.get() //
.find(attributeStartsWith("id", "event-"))
.stream()
.map(t -> t.find(attributeEquals("class", "ect-event-url")))
.flatMap(List::stream)
.map(t -> t.get("href"))
.toList();
.get() //
.find(attributeStartsWith(ID, "event-"))
.stream()
.map(t -> t.find(attributeEquals(CLASS, "ect-event-url")))
.flatMap(List::stream)
.map(t -> t.get(HREF))
.toList();
return Payload.of(list);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeContains(CLASS,"single-event-description");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
return Payload.of(DEFAULT_LOCATION);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return null;
}
protected Result<LocalDate> extractStartDate(Tag eventTag) {
Result<Tag> startDateTag = extractStartDateTag(eventTag);
var opt = startDateTag.optional();
if (opt.isEmpty()) return transform(startDateTag);
return parseStartDate(opt.get().get(TITLE));
}
protected Result<Tag> extractStartDateTag(Tag eventTag) {
var list = eventTag.find(extractStartDateFilter());
if (list.isEmpty()) {
return error("Failed to find start date tag");
}
return Payload.of(list.getFirst());
}
@Override
protected Result<Tag> extractLocationTag(Tag eventTag) {
return Payload.of(new Tag("span").content(DEFAULT_LOCATION));
protected Predicate<Tag> extractStartDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"date")));
}
protected Result<Tag> extractStartTag(Tag eventTag) {
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start"));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Failed to locate start tag");
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"time")));
}
@Override
@ -97,33 +138,36 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491); @@ -97,33 +138,36 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
}
@Override
protected Result<Tag> extractTitleTag(Tag eventTag) {
var list = eventTag.find(attributeEndsWith("class", "single-event-title"));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Failed to find title tag");
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDate> parseEndDate(String text) {
return error("parseEndDate(…) not supported");
protected Result<LocalDate> parseEndDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
}
@Override
protected Result<LocalDate> parseStartDate(String text) {
var match = DATE_PATTERN.matcher(text);
if (match.find()) {
var dayOfMonth = Integer.parseInt(match.group(1));
var month = toNumericMonth(match.group(2));
if (month.optional().isEmpty()) return transform(month);
protected Result<LocalTime> parseEndTime(String string) {
return Util.parseGermanTime(string);
}
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0"));
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0"));
var now = LocalDateTime.now();
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute);
if (date.isBefore(now)) date = date.plusYears(1);
//return Payload.of(date);
@Override
protected Result<LocalDate> parseStartDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
return error("Failed to recognize a date in \"%s\"", text);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return Util.parseGermanTime(string);
}
@Override

Loading…
Cancel
Save