Browse Source

started another importer overhaul

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
main
Stephan Richter 4 months ago
parent
commit
3c7313fc01
  1. 2
      de.srsoftware.cal.app/build.gradle.kts
  2. 7
      de.srsoftware.cal.app/src/main/java/de/srsoftware/cal/app/AutoImporter.java
  3. 2
      de.srsoftware.cal.base/build.gradle.kts
  4. 435
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java
  5. 37
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java
  6. 2
      de.srsoftware.cal.importer/build.gradle.kts
  7. 143
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/CosmicDawn.java
  8. 126
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Kassablanca.java
  9. 18
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

2
de.srsoftware.cal.app/build.gradle.kts

@ -14,6 +14,6 @@ dependencies { @@ -14,6 +14,6 @@ dependencies {
implementation("de.srsoftware:tools.logging:1.0.3")
implementation("de.srsoftware:tools.plugin:1.0.1")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.9")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("com.mysql:mysql-connector-j:9.1.0")
}

7
de.srsoftware.cal.app/src/main/java/de/srsoftware/cal/app/AutoImporter.java

@ -10,7 +10,6 @@ import de.srsoftware.cal.api.Importer; @@ -10,7 +10,6 @@ import de.srsoftware.cal.api.Importer;
import de.srsoftware.cal.db.Database;
import de.srsoftware.tools.plugin.ClassListener;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.*;
@ -282,10 +281,10 @@ public class AutoImporter implements Runnable, ClassListener { @@ -282,10 +281,10 @@ public class AutoImporter implements Runnable, ClassListener {
if (Importer.class.isAssignableFrom(aClass)) try {
var instance = aClass.getDeclaredConstructor().newInstance();
importers.add((Importer) instance);
LOG.log(INFO,"Added {0} to the list of importers. Will be used soon…",instance);
lastImport = null;
} catch (InvocationTargetException | InstantiationException | IllegalAccessException | NoSuchMethodException e) {
throw new RuntimeException(e);
LOG.log(INFO,"Added {0} to the list of importers. Will be used soon…",instance);
} catch (Exception e) {
LOG.log(WARNING,"Failed to add importer: {0}",aClass.getSimpleName(),e);
}
}

2
de.srsoftware.cal.base/build.gradle.kts

@ -5,6 +5,6 @@ dependencies { @@ -5,6 +5,6 @@ dependencies {
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.9")
implementation("de.srsoftware:tools.web:1.3.10")
implementation("org.json:json:20240303")
}

435
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java

@ -1,8 +1,11 @@ @@ -1,8 +1,11 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal;
import static de.srsoftware.cal.Util.combine;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.HREF;
import static de.srsoftware.tools.TagFilter.*;
import static java.lang.System.Logger.Level.WARNING;
import de.srsoftware.cal.api.*;
@ -10,17 +13,16 @@ import de.srsoftware.tools.*; @@ -10,17 +13,16 @@ import de.srsoftware.tools.*;
import de.srsoftware.tools.Error;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.*;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.time.LocalTime;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Stream;
public abstract class BaseImporter implements Importer {
@ -35,15 +37,13 @@ public abstract class BaseImporter implements Importer { @@ -35,15 +37,13 @@ public abstract class BaseImporter implements Importer {
protected abstract String baseUrl();
@Override
public String description() {
return "abstract base class to create other importers on";
}
public abstract String description();
protected List<Attachment> extractAttachments(Tag eventTag) {
return extractAttachmentsTag(eventTag) //
.optional()
.stream()
.flatMap(tag -> tag.find(TagFilter.ofType("img")).stream())
.flatMap(tag -> tag.find(IS_IMAGE).stream())
.map(tag -> tag.get("src"))
.filter(Objects::nonNull)
.map(url -> url.contains("://") ? url : baseUrl()+url)
@ -55,35 +55,77 @@ public abstract class BaseImporter implements Importer { @@ -55,35 +55,77 @@ public abstract class BaseImporter implements Importer {
.toList();
}
protected abstract Predicate<Tag> extractAttachmentsFilter();
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
return extractDescriptionTag(eventTag);
var list = eventTag.find(extractAttachmentsFilter());
if (list.isEmpty()) return error("Failed to find attachments tag");
return Payload.of(list.getFirst());
}
protected Result<String> extractDescription(Tag eventTag) {
Result<Tag> descriptionTag = extractDescriptionTag(eventTag);
if (descriptionTag.optional().isEmpty()) return transform(descriptionTag);
Tag tag = descriptionTag.optional().get();
tag.find(t -> t.is("iframe")).forEach(Tag::remove);
var inner = tag.inner(2);
if (inner.isPresent()) return Payload.of(inner.get());
return error("No description found");
return inner.isPresent() ? Payload.of(inner.get()) : error("No description found");
}
protected abstract Result<Tag> extractDescriptionTag(Tag eventTag);
protected abstract Predicate<Tag> extractDescriptionFilter();
protected Result<Coords> extractCoords(Tag eventTag) {
return error("not implemented");
protected Result<Tag> extractDescriptionTag(Tag eventTag){
var list = eventTag.find(extractDescriptionFilter());
if (list.isEmpty()) return error("Failed to find attachments tag");
return Payload.of(list.getFirst());
}
protected abstract Result<Coords> extractCoords(Tag eventTag);
protected Result<LocalDateTime> extractEnd(Tag eventTag) {
Result<Tag> endTag = extractEndTag(eventTag);
if (endTag.optional().isEmpty()) return transform(endTag);
return parseEndDate(endTag.optional().get().toString(0));
Result<LocalDate> date = extractEndDate(eventTag);
Result<LocalTime> time = extractEndTime(eventTag);
return combine(date,time);
}
protected Result<LocalDate> extractEndDate(Tag eventTag) {
Result<Tag> endDateTag = extractEndDateTag(eventTag);
var opt = endDateTag.optional();
if (opt.isEmpty()) return transform(endDateTag);
return parseEndDate(opt.get().strip());
}
protected abstract Result<Tag> extractEndTag(Tag eventTag);
private Result<Tag> extractEndDateTag(Tag eventTag) {
var list = eventTag.find(extractEndDateFilter());
if (list.isEmpty()) return error("Failed to find end date tag");
return Payload.of(list.getFirst());
}
protected abstract Predicate<Tag> extractEndDateFilter();
protected Result<LocalTime> extractEndTime(Tag eventTag) {
Result<Tag> endTimeTag = extractEndTimeTag(eventTag);
var opt = endTimeTag.optional();
if (opt.isEmpty()) return transform(endTimeTag);
return parseEndTime(opt.get().strip());
}
private Result<Tag> extractEndTimeTag(Tag eventTag) {
var list = eventTag.find(extractEndTimeFilter());
if (list.isEmpty()) return error("Failed to find end time tag");
return Payload.of(list.getFirst());
}
protected abstract Predicate<Tag> extractEndTimeFilter();
protected Result<Appointment> extractEvent(Result<Tag> domResult, Link eventPage) {
var opt = domResult.optional();
if (opt.isEmpty()) return transform(domResult);
var eventTag = opt.get();
protected Result<Appointment> extractEvent(Tag eventTag, Link eventPage) {
long id = 0;
var titleResult = extractTitle(eventTag);
@ -116,196 +158,235 @@ public abstract class BaseImporter implements Importer { @@ -116,196 +158,235 @@ public abstract class BaseImporter implements Importer {
return Payload.of(event);
}
private Result<Appointment> extractEvent(Result<Tag> domResult, Link eventPage) {
return switch (domResult) {
case Payload<Tag> payload -> extractEvent(payload.get(), eventPage);
case Error<Tag> err -> err.transform();
default -> invalidParameter(domResult);
};
}
protected abstract Result<Tag> extractEventTag(Result<Tag> pageResult);
protected abstract Result<List<String>> extractEventUrls(Result<Tag> programPage);
protected List<Link> extractLinks(Tag appointmentTag) {
var links = new ArrayList<Link>();
extractLinksTag(appointmentTag) //
.map(this::extractLinkAnchors)
.optional()
.stream()
.flatMap(List::stream)
.forEach(anchor -> {
var href = anchor.get("href");
if (href == null) return;
if (!href.contains("://")) href = baseUrl() + href;
var text = anchor.inner(0).orElse(href);
Payload //
.of(href)
.map(BaseImporter::url)
.map(url -> link(url, text))
.optional()
.ifPresent(links::add);
});
return links;
}
/**
* Die ist der Tag auf der Seite, der alle weiteren Event-Daten umfasst.
* Im Prinzip kann der Page-Tag auch direkt weitergereicht werden, dann
* sind die weiteren Suchen aber Umfangreicher.
* Besser ist es daher, den Tag näher einzugrenzen
* @param pageResult
* @return
*/
protected Result<Tag> extractEventTag(Result<Tag> pageResult){
var opt = pageResult.optional();
if (opt.isEmpty()) return transform(pageResult);
var list = opt.get().find(extractEventTagFilter());
if (list.isEmpty()) return error("Failed to find event tag");
return Payload.of(list.getFirst());
}
public abstract Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult);
protected abstract Predicate<Tag> extractEventTagFilter();
protected abstract Result<List<String>> extractEventUrls(Result<Tag> programPage);
protected List<Link> extractLinks(Tag appointmentTag) {
var tag = extractLinksTag(appointmentTag);
var opt = tag.optional();
if (opt.isEmpty()) return List.of();
Tag linksTag = opt.get();
return linksTag.find(IS_ANCHOR).stream()
.map(anchor -> {
var href = anchor.get(HREF);
if (href == null) return null;
if (!href.contains("://")) href = baseUrl()+href;
var txt = anchor.strip();
return BaseImporter.url(Payload.of(href)).optional().map(url -> new Link(url,txt)).orElse(null);
})
.filter(Objects::nonNull)
.toList();
}
protected Result<Tag> extractLinksTag(Tag eventTag) {
return extractDescriptionTag(eventTag);
}
var list = eventTag.find(extractLinksFilter());
if (list.isEmpty()) return error("Failed to find links tag");
return Payload.of(list.getFirst());
}
protected Result<String> extractLocation(Tag eventTag) {
protected abstract Predicate<Tag> extractLinksFilter();
protected Result<String> extractLocation(Tag eventTag) {
Result<Tag> locationTag = extractLocationTag(eventTag);
if (locationTag.optional().isEmpty()) return transform(locationTag);
return Payload.of(locationTag.optional().get().toString(2));
}
protected abstract Result<Tag> extractLocationTag(Tag eventTag);
protected Result<Tag> extractLocationTag(Tag eventTag){
var list = eventTag.find(extractLocationFilter());
if (list.isEmpty()) return error("Failed to find location tag");
return Payload.of(list.getFirst());
}
protected Result<LocalDateTime> extractStart(Tag eventTag) {
Result<Tag> startTag = extractStartTag(eventTag);
if (startTag.optional().isEmpty()) return transform(startTag);
return parseStartDate(startTag.optional().get().strip());
}
protected abstract Predicate<Tag> extractLocationFilter();
protected abstract Result<Tag> extractStartTag(Tag eventTag);
protected abstract List<String> extractTags(Tag eventTag);
protected Result<LocalDateTime> extractStart(Tag eventTag) {
Result<LocalDate> date = extractStartDate(eventTag);
Result<LocalTime> time = extractStartTime(eventTag);
return combine(date,time);
}
protected Result<LocalDate> extractStartDate(Tag eventTag) {
Result<Tag> startDateTag = extractStartDateTag(eventTag);
var opt = startDateTag.optional();
if (opt.isEmpty()) return transform(startDateTag);
return parseStartDate(opt.get().strip());
}
private Result<Tag> extractStartDateTag(Tag eventTag) {
var list = eventTag.find(extractStartDateFilter());
if (list.isEmpty()) return error("Failed to find start date tag");
return Payload.of(list.getFirst());
}
protected abstract Predicate<Tag> extractStartDateFilter();
protected Result<LocalTime> extractStartTime(Tag eventTag) {
Result<Tag> startTimeTag = extractStartTimeTag(eventTag);
var opt = startTimeTag.optional();
if (opt.isEmpty()) return transform(startTimeTag);
return parseStartTime(opt.get().strip());
}
private Result<Tag> extractStartTimeTag(Tag eventTag) {
var list = eventTag.find(extractStartTimeFilter());
if (list.isEmpty()) return error("Failed to find start time tag");
return Payload.of(list.getFirst());
}
protected abstract Predicate<Tag> extractStartTimeFilter();
protected abstract List<String> extractTags(Tag eventTag);
protected Result<String> extractTitle(Tag eventTag) {
Result<Tag> titleTag = extractTitleTag(eventTag);
if (titleTag.optional().isEmpty()) return transform(titleTag);
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
return inner.isPresent() ? Payload.of(inner.get()) :
error("No title found");
}
return inner.isPresent() ? Payload.of(inner.get()) : error("No title found");
}
protected abstract Result<Tag> extractTitleTag(Tag eventTag);
@Override
public Stream<Appointment> fetch() {
var url = Payload.of(programURL());
Stream<Result<String>> urls = url(url)
.map(this::open) //
.map(this::preload)
.map(this::parseXML)
.map(this::extractEventUrls)
.stream();
return urls //
.map(BaseImporter::url)
.map(this::loadEvent)
.peek(e -> {
if (e instanceof Error<Appointment> err) System.err.println(err);
})
.flatMap(result -> result.optional().stream());
}
protected Result<Tag> extractTitleTag(Tag eventTag){
var list = eventTag.find(extractTitleFilter());
if (list.isEmpty()) return error("Failed to find title tag");
return Payload.of(list.getFirst());
}
protected static <T> Result<T> invalidParameter(Result<?> result) {
return error("Invalid parameter: %s", result.getClass().getSimpleName());
}
protected abstract Predicate<Tag> extractTitleFilter();
protected static Result<Link> link(Result<URL> url, String text) {
var opt = url.optional();
if (opt.isEmpty()) return transform(url);
return Payload.of(new Link(opt.get(), text));
}
@Override
public Stream<Appointment> fetch() {
var url = Payload.of(programURL());
Stream<Result<String>> urls = url(url)
.map(this::open) //
.map(this::preload)
.map(this::parseXML)
.map(this::extractEventUrls)
.stream();
return urls //
.map(BaseImporter::url)
.map(this::loadEvent)
.peek(e -> {
if (e instanceof Error<Appointment> err) System.err.println(err);
})
.flatMap(result -> result.optional().stream());
}
protected Result<Appointment> loadEvent(Result<URL> urlResult) {
var link = urlResult //
.optional()
.map(url -> new Link(url, "Event-Seite"))
.orElse(null);
return urlResult //
.map(this::open)
.map(this::preload)
.map(this::parseXML)
.map(this::extractEventTag)
.map(tagResult -> extractEvent(tagResult, link));
}
protected static <T> Result<T> invalidParameter(Result<?> result) {
return error("Invalid parameter: %s", result.getClass().getSimpleName());
}
protected static Result<Link> link(Result<URL> url, String text) {
var opt = url.optional();
return opt.isEmpty() ? transform(url) : Payload.of(new Link(opt.get(), text));
}
protected Result<Appointment> loadEvent(Result<URL> urlResult) {
var link = urlResult //
.optional()
.map(url -> new Link(url, "Event-Seite"))
.orElse(null);
return urlResult //
.map(this::open)
.map(this::preload)
.map(this::parseXML)
.map(this::extractEventTag)
.map(tagResult -> extractEvent(tagResult, link));
}
protected Result<InputStream> open(Result<URL> url) {
switch (url) {
case Payload<URL> payload:
try {
return Payload.of(payload.get().openConnection().getInputStream());
} catch (IOException e) {
return error(e, "Failed to open %s", payload, e);
}
case Error<URL> error:
return error.transform();
default:
return invalidParameter(url);
}
protected Result<InputStream> open(Result<URL> url) {
var opt = url.optional();
if (opt.isEmpty()) return transform(url);
try {
var conn = (HttpURLConnection) opt.get().openConnection();
conn.setRequestProperty("Accept","*/*");
conn.setRequestProperty("Host",opt.get().getHost());
conn.setRequestProperty("User-Agent","OpenCloudCal/0.1");
return Payload.of(conn.getInputStream());
} catch (IOException e) {
return error(e, "Failed to open %s", url, e);
}
protected abstract Result<LocalDateTime> parseEndDate(String string);
}
protected abstract Result<LocalDate> parseEndDate(String string);
protected abstract Result<LocalTime> parseEndTime(String string);
protected abstract Result<LocalDateTime> parseStartDate(String string);
protected abstract Result<LocalDate> parseStartDate(String string);
protected abstract Result<LocalTime> parseStartTime(String string);
protected Result<Tag> parseXML(Result<InputStream> inputStream) {
return switch (inputStream) {
case Payload<InputStream> payload -> XMLParser.parse(payload.get());
case Error<InputStream> error -> error.transform();
default -> invalidParameter(inputStream);
};
protected Result<Tag> parseXML(Result<InputStream> inputStream) {
var opt = inputStream.optional();
return opt.isEmpty() ? transform((inputStream)) : XMLParser.parse(opt.get());
}
protected Result<InputStream> preload(Result<InputStream> inputStream) {
switch (inputStream) {
case Payload<InputStream> payload:
try {
return Payload.of(XMLParser.preload(payload.get()));
} catch (IOException e) {
return error(e, "Failed to buffer data from %s", payload);
}
case Error<InputStream> error:
return error.transform();
default:
return invalidParameter(inputStream);
}
var opt = inputStream.optional();
if (opt.isEmpty()) return transform(inputStream);
try {
return Payload.of(XMLParser.preload(opt.get()));
} catch (IOException e) {
return error(e, "Failed to buffer data from %s", inputStream);
}
}
protected abstract String programURL();
protected static Result<Attachment> toAttachment(Result<URL> urlResult) {
var opt = urlResult.optional();
if (opt.isEmpty()) return transform(urlResult);
try {
var mime = opt.get().openConnection().getContentType();
return Payload.of(new Attachment(opt.get(), mime));
} catch (Exception e) {
LOG.log(WARNING, "Failed to read mime type of {0}", opt.get());
return error("Failed to read mime type of %s", opt.get());
}
protected abstract String programURL();
protected static Result<Attachment> toAttachment(Result<URL> urlResult) {
var opt = urlResult.optional();
if (opt.isEmpty()) return transform(urlResult);
try {
var mime = opt.get().openConnection().getContentType();
return Payload.of(new Attachment(opt.get(), mime));
} catch (Exception e) {
LOG.log(WARNING, "Failed to read mime type of {0}", opt.get());
return error("Failed to read mime type of %s", opt.get());
}
}
protected static Result<Integer> toNumericMonth(String month) {
month = month.toLowerCase();
if (month.startsWith("ja")) return Payload.of(1);
if (month.startsWith("f")) return Payload.of(2);
if ("may".equals(month) || "mai".equals(month)) return Payload.of(5);
if (month.startsWith("m")) return Payload.of(3);
if (month.startsWith("ap")) return Payload.of(4);
if (month.startsWith("jun")) return Payload.of(6);
if (month.startsWith("jul")) return Payload.of(7);
if (month.startsWith("au")) return Payload.of(8);
if (month.startsWith("s")) return Payload.of(9);
if (month.startsWith("o")) return Payload.of(10);
if (month.startsWith("n")) return Payload.of(11);
if (month.startsWith("d")) return Payload.of(12);
return error("Failed to recognize \"%s\" as a month!", month);
}
protected static Result<Integer> toNumericMonth(String month) {
month = month.toLowerCase();
if (month.startsWith("ja")) return Payload.of(1);
if (month.startsWith("f")) return Payload.of(2);
if ("may".equals(month) || "mai".equals(month)) return Payload.of(5);
if (month.startsWith("m")) return Payload.of(3);
if (month.startsWith("ap")) return Payload.of(4);
if (month.startsWith("jun")) return Payload.of(6);
if (month.startsWith("jul")) return Payload.of(7);
if (month.startsWith("au")) return Payload.of(8);
if (month.startsWith("s")) return Payload.of(9);
if (month.startsWith("o")) return Payload.of(10);
if (month.startsWith("n")) return Payload.of(11);
if (month.startsWith("d")) return Payload.of(12);
return error("Failed to recognize \"%s\" as a month!", month);
}
protected static Result<URL> url(Result<String> urlResult) {
if (urlResult.optional().isEmpty()) return transform(urlResult);
var url = urlResult.optional().get();
try {
return Payload.of(new URI(url).toURL());
} catch (MalformedURLException | URISyntaxException e) {
return error(e, "Failed to create URL of %s", url);
}
protected static Result<URL> url(Result<String> urlResult) {
if (urlResult.optional().isEmpty()) return transform(urlResult);
var url = urlResult.optional().get();
try {
return Payload.of(new URI(url).toURL());
} catch (MalformedURLException | URISyntaxException e) {
return error(e, "Failed to create URL of %s", url);
}
}
}

37
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java

@ -2,12 +2,17 @@ @@ -2,12 +2,17 @@
package de.srsoftware.cal;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.regex.Pattern;
public class Util {
public static final String BEGIN = "BEGIN";
@ -26,8 +31,18 @@ public class Util { @@ -26,8 +31,18 @@ public class Util {
public static final String VEVENT = "VEVENT";
public static final String VCALENDAR = "VCALENDAR";
public static final Pattern GERMAN_DATE_PATTERN = Pattern.compile("\\D(\\d\\d?)\\.(\\d\\d?)\\.(\\d{4})\\D");
public static final Pattern GERMAN_TIME_PATTERN = Pattern.compile("\\D(\\d\\d?):(\\d\\d?)(:(\\d\\d?))?\\D");
private Util(){}
public static Result<LocalDateTime> combine(Result<LocalDate> date, Result<LocalTime> time) {
if (date.optional().isEmpty())return transform(date);
if (time.optional().isEmpty())return transform(time);
return Payload.of(LocalDateTime.of(date.optional().get(),time.optional().get()));
}
/**
* formats a content line as defined in <a href="https://datatracker.ietf.org/doc/html/rfc5545#section-3.1">iCalendar spec</a>
* @param key the content line key
@ -76,6 +91,28 @@ public class Util { @@ -76,6 +91,28 @@ public class Util {
.replace(":","/");
}
public static Result<LocalDate> parseGermanDate(String s){
var match = GERMAN_DATE_PATTERN.matcher(s);
if (match.find()){
var day = Integer.parseInt(match.group(1));
var month = Integer.parseInt(match.group(2));
var year = Integer.parseInt(match.group(3));
return Payload.of(LocalDate.of(year,month,day));
}
return error("Failed to find date");
}
public static Result<LocalTime> parseGermanTime(String s){
var match = GERMAN_TIME_PATTERN.matcher(s);
if (match.find()){
var hour = Integer.parseInt(match.group(1));
var minute = Integer.parseInt(match.group(2));
var sec = match.group(4);
var second = sec == null ? 0 : Integer.parseInt(sec);
return Payload.of(LocalTime.of(hour,minute,second));
}
return error("Failed to find date");
}
/**
* wraps a text (list of vevents in a vcalendar, as described in th <a href="https://datatracker.ietf.org/doc/html/rfc5545#section-3.4">iCalendar spec</a>

2
de.srsoftware.cal.importer/build.gradle.kts

@ -5,5 +5,5 @@ dependencies { @@ -5,5 +5,5 @@ dependencies {
implementation(project(":de.srsoftware.cal.base"))
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.9")
implementation("de.srsoftware:tools.web:1.3.10")
}

143
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/CosmicDawn.java

@ -0,0 +1,143 @@ @@ -0,0 +1,143 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.TagFilter.*;
import static java.nio.charset.StandardCharsets.UTF_8;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.tools.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.util.List;
import java.util.regex.Pattern;
public abstract class CosmicDawn extends BaseImporter {
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d\\d?).(\\d\\d?).(\\d{4}).*(\\d\\d?):(\\d\\d?)");
public CosmicDawn() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.kuba-jena.de";
}
@Override
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
var list = eventTag.find(attributeEndsWith("class","event-body-content"));
return list.isEmpty() ? error("failed to find <div class=\"…event-body-content\">") : Payload.of(list.getFirst());
}
protected Result<Tag> extractEndTag(Tag eventTag) {
return null;
}
@Override
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
if (pageResult.optional().isEmpty()) return transform(pageResult);
List<Tag> list = pageResult.optional().get().find(attributeEquals("class", "inside-article"));
return (list.isEmpty()) ? error("Failed to find <div class=\"inside-article\">!") : Payload.of(list.getFirst());
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var page = programPage.optional();
if (page.isEmpty()) return transform(programPage);
try {
Files.writeString(Path.of("/tmp/test.txt"),page.get().toString(2));
} catch (IOException e) {
throw new RuntimeException(e);
}
var list = page.get().find(attributeEquals("class","event_listings_main"));
var urlList = list.stream()
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
.map(tag -> tag.get("href"))
.toList();
return Payload.of(urlList);
}
@Override
protected Result<Tag> extractLocationTag(Tag eventTag) {
return null;
}
protected Result<Tag> extractStartTag(Tag eventTag) {
var dateTags = eventTag.find(attributeContains("class","event-date-time"));
if (dateTags.isEmpty()) return error("Start date not found!");
var times = eventTag.find(attributeEquals("class","event_time")).stream()
.flatMap(tag -> tag.find(IS_SPAN).stream())
.filter(tag -> tag.toString().contains("Begin"))
.toList();
if (times.isEmpty()) return error("Start time not found!");
var div = Tag.of("div").add(dateTags.getFirst()).add(times.getFirst());
return Payload.of(div);
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of();
}
@Override
protected Result<Tag> extractTitleTag(Tag eventTag) {
var list = eventTag.find(ofType("h1"));
return list.isEmpty() ? error("failed to find <h1>") : Payload.of(list.getFirst());
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String date) {
var matcher = START_DATE_PATTERN.matcher(date);
if (matcher.find()){
int day = Integer.parseInt(matcher.group(1));
int mon = Integer.parseInt(matcher.group(2));
int year= Integer.parseInt(matcher.group(3));
int hour = Integer.parseInt(matcher.group(4));
int min = Integer.parseInt(matcher.group(5));
}
return null;
}
/**
* Die Kuba-Seite haut einen haufen Script mit raus, der dazu führt, dass die Tags nicht richtig geparsed werden.
* Also schneiden wir den kompletten header ab...
* @param inputStream eingehender InputStream, verpackt in Result
* @return ausgehender InputStream, verpackt in Result
*/
@Override
protected Result<InputStream> preload(Result<InputStream> inputStream) {
var opt = inputStream.optional();
if (opt.isEmpty()) return transform(inputStream);
try {
var input = opt.get();
var bos = new ByteArrayOutputStream();
input.transferTo(bos);
input.close();
String code = bos.toString(UTF_8);
var pos = code.indexOf("<body");
return Payload.of(new ByteArrayInputStream(code.substring(pos).getBytes(UTF_8)));
} catch (IOException e) {
return error(e, "Failed to buffer data from %s", inputStream);
}
}
@Override
protected String programURL() {
return baseUrl()+"/veranstaltungen/";
//return "http://httpbin.org/headers";
}
}

126
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Kassablanca.java

@ -1,24 +1,25 @@ @@ -1,24 +1,25 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.cal.Util.*;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.CLASS;
import static de.srsoftware.tools.Tag.DIV;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.*;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDateTime;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.List;
import java.util.regex.Pattern;
import java.util.function.Predicate;
public class Kassablanca extends BaseImporter {
public static final String BASE_URL = "https://www.kassablanca.de";
private static final String APPOINTMENT_TAG_ID = "entry-content";
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d+).(\\d+).(\\d+).*Beginn\\s*(\\d+):(\\d+)\\s*Uhr");
private static final String LOCATION = "Kassablanca e.V., Felsenkellerstr. 13a, 07745 Jena";
private static final Coords COORDS = new Coords(50.92093, 11.57788);
private static final String LOCATION = "Kassablanca e.V., Felsenkellerstr. 13a, 07745 Jena";
public Kassablanca() throws NoSuchAlgorithmException {
super();
@ -29,70 +30,87 @@ public class Kassablanca extends BaseImporter { @@ -29,70 +30,87 @@ public class Kassablanca extends BaseImporter {
return BASE_URL;
}
@Override
public String description() {
return "Importiert Events des Studentenclubs „Kassablanca“ in Jena";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeEquals(CLASS,"entry-content");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeEquals(CLASS,"se-content");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
return Payload.of(COORDS);
}
@Override
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
var list = eventTag.find(attributeHas("class", "se-content"));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Failed to find description tag");
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Result<Tag> extractEndTag(Tag eventTag) {
return error("end date not supported");
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
if (pageResult.optional().isEmpty()) return transform(pageResult);
var list = pageResult.optional().get().find(attributeEquals("class", APPOINTMENT_TAG_ID));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
protected Predicate<Tag> extractEventTagFilter() {
return attributeEquals(CLASS,"entry-content");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
if (programPage.optional().isEmpty()) return transform(programPage);
List<String> list = programPage.optional()
.get() //
.find(attributeHas("class", "eventrow"))
.stream()
.flatMap(t -> t.find(ofType("h3")).stream())
.map(t -> t.find(ofType("a")))
.flatMap(List::stream)
.map(t -> t.get("href"))
.toList();
.get() //
.find(attributeHas("class", "eventrow"))
.stream()
.flatMap(t -> t.find(ofType("h3")).stream())
.map(t -> t.find(IS_ANCHOR))
.flatMap(List::stream)
.map(t -> t.get("href"))
.toList();
return Payload.of(list);
}
@Override
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) {
if (tagResult.optional().isEmpty()) return transform(tagResult);
var tag = tagResult.optional().get();
tag.find(attributeEquals("id", "filterbar")).stream().findAny().ifPresent(Tag::remove); // remove div with unrelated links
var anchors = tag.find(withAttribute("href"));
return Payload.of(anchors);
protected Predicate<Tag> extractLinksFilter() {
return attributeEquals(CLASS,"se-container");
}
@Override
protected Result<Tag> extractLinksTag(Tag eventTag) {
return Payload.of(eventTag);
var top = eventTag.find(attributeEquals(CLASS,"se-container"));
var bottom = eventTag.find(attributeEquals(CLASS, "se-content"));
var common = Tag.of(DIV).addAll(top).addAll(bottom);
return Payload.of(common);
}
@Override
protected Result<Tag> extractLocationTag(Tag eventTag) {
return Payload.of(new Text(LOCATION));
protected Result<String> extractLocation(Tag eventTag) {
return Payload.of(LOCATION);
}
@Override
protected Result<Tag> extractStartTag(Tag eventTag) {
List<Tag> tags = eventTag.find(attributeEquals("class", "se-header"));
if (tags.size() == 1) return Payload.of(tags.getFirst());
return error("Failed to find event time information");
protected Predicate<Tag> extractLocationFilter() {
return null;
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeEquals(CLASS,"se-header");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeEquals(CLASS,"se-header");
}
@Override
@ -101,30 +119,28 @@ public class Kassablanca extends BaseImporter { @@ -101,30 +119,28 @@ public class Kassablanca extends BaseImporter {
}
@Override
protected Result<Tag> extractTitleTag(Tag eventTag) {
var list = eventTag.find(ofType("h1"));
if (list.size() == 1) return Payload.of(list.getFirst());
return error("Failed to find title tag");
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDateTime> parseEndDate(String string) {
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalDateTime> parseStartDate(String string) {
var matcher = START_DATE_PATTERN.matcher(string);
if (matcher.find()) {
var day = Integer.parseInt(matcher.group(1));
var month = Integer.parseInt(matcher.group(2));
var year = Integer.parseInt(matcher.group(3));
var hour = Integer.parseInt(matcher.group(4));
var minute = Integer.parseInt(matcher.group(5));
var date = LocalDateTime.of(year, month, day, hour, minute);
return Payload.of(date);
}
return error("Could not recognize start date/time");
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
return parseGermanDate(string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string);
}
@Override

18
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

@ -12,11 +12,12 @@ import de.srsoftware.tools.Payload; @@ -12,11 +12,12 @@ import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.List;
import java.util.regex.Pattern;
public class Rosenkeller extends BaseImporter {
public abstract class Rosenkeller extends BaseImporter {
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
private static final String BASE_URL = "https://rosenkeller.org";
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
@ -51,7 +52,7 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491); @@ -51,7 +52,7 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
return error("Failed to find description tag");
}
@Override
protected Result<Tag> extractEndTag(Tag eventTag) {
return error("extractEndTag(…) not supported");
}
@ -78,19 +79,12 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491); @@ -78,19 +79,12 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
return Payload.of(list);
}
@Override
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) {
if (tagResult.optional().isEmpty()) return transform(tagResult);
List<Tag> list = tagResult.optional().get().find(attributeStartsWith("id", "post-")).stream().flatMap(tag -> tag.find(ofType("a")).stream()).toList();
return Payload.of(list);
}
@Override
protected Result<Tag> extractLocationTag(Tag eventTag) {
return Payload.of(new Tag("span").content(DEFAULT_LOCATION));
}
@Override
protected Result<Tag> extractStartTag(Tag eventTag) {
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start"));
if (list.size() == 1) return Payload.of(list.getFirst());
@ -110,12 +104,12 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491); @@ -110,12 +104,12 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
}
@Override
protected Result<LocalDateTime> parseEndDate(String text) {
protected Result<LocalDate> parseEndDate(String text) {
return error("parseEndDate(…) not supported");
}
@Override
protected Result<LocalDateTime> parseStartDate(String text) {
protected Result<LocalDate> parseStartDate(String text) {
var match = DATE_PATTERN.matcher(text);
if (match.find()) {
var dayOfMonth = Integer.parseInt(match.group(1));
@ -127,7 +121,7 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491); @@ -127,7 +121,7 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
var now = LocalDateTime.now();
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute);
if (date.isBefore(now)) date = date.plusYears(1);
return Payload.of(date);
//return Payload.of(date);
}
return error("Failed to recognize a date in \"%s\"", text);
}

Loading…
Cancel
Save