started another importer overhaul
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -14,6 +14,6 @@ dependencies {
|
|||||||
implementation("de.srsoftware:tools.logging:1.0.3")
|
implementation("de.srsoftware:tools.logging:1.0.3")
|
||||||
implementation("de.srsoftware:tools.plugin:1.0.1")
|
implementation("de.srsoftware:tools.plugin:1.0.1")
|
||||||
implementation("de.srsoftware:tools.util:1.3.0")
|
implementation("de.srsoftware:tools.util:1.3.0")
|
||||||
implementation("de.srsoftware:tools.web:1.3.9")
|
implementation("de.srsoftware:tools.web:1.3.10")
|
||||||
implementation("com.mysql:mysql-connector-j:9.1.0")
|
implementation("com.mysql:mysql-connector-j:9.1.0")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ import de.srsoftware.cal.api.Importer;
|
|||||||
import de.srsoftware.cal.db.Database;
|
import de.srsoftware.cal.db.Database;
|
||||||
import de.srsoftware.tools.plugin.ClassListener;
|
import de.srsoftware.tools.plugin.ClassListener;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
@@ -282,10 +281,10 @@ public class AutoImporter implements Runnable, ClassListener {
|
|||||||
if (Importer.class.isAssignableFrom(aClass)) try {
|
if (Importer.class.isAssignableFrom(aClass)) try {
|
||||||
var instance = aClass.getDeclaredConstructor().newInstance();
|
var instance = aClass.getDeclaredConstructor().newInstance();
|
||||||
importers.add((Importer) instance);
|
importers.add((Importer) instance);
|
||||||
LOG.log(INFO,"Added {0} to the list of importers. Will be used soon…",instance);
|
|
||||||
lastImport = null;
|
lastImport = null;
|
||||||
} catch (InvocationTargetException | InstantiationException | IllegalAccessException | NoSuchMethodException e) {
|
LOG.log(INFO,"Added {0} to the list of importers. Will be used soon…",instance);
|
||||||
throw new RuntimeException(e);
|
} catch (Exception e) {
|
||||||
|
LOG.log(WARNING,"Failed to add importer: {0}",aClass.getSimpleName(),e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,6 @@ dependencies {
|
|||||||
|
|
||||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||||
implementation("de.srsoftware:tools.util:1.3.0")
|
implementation("de.srsoftware:tools.util:1.3.0")
|
||||||
implementation("de.srsoftware:tools.web:1.3.9")
|
implementation("de.srsoftware:tools.web:1.3.10")
|
||||||
implementation("org.json:json:20240303")
|
implementation("org.json:json:20240303")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
/* © SRSoftware 2024 */
|
/* © SRSoftware 2024 */
|
||||||
package de.srsoftware.cal;
|
package de.srsoftware.cal;
|
||||||
|
|
||||||
|
import static de.srsoftware.cal.Util.combine;
|
||||||
import static de.srsoftware.tools.Error.error;
|
import static de.srsoftware.tools.Error.error;
|
||||||
import static de.srsoftware.tools.Result.transform;
|
import static de.srsoftware.tools.Result.transform;
|
||||||
|
import static de.srsoftware.tools.Tag.HREF;
|
||||||
|
import static de.srsoftware.tools.TagFilter.*;
|
||||||
import static java.lang.System.Logger.Level.WARNING;
|
import static java.lang.System.Logger.Level.WARNING;
|
||||||
|
|
||||||
import de.srsoftware.cal.api.*;
|
import de.srsoftware.cal.api.*;
|
||||||
@@ -10,17 +13,16 @@ import de.srsoftware.tools.*;
|
|||||||
import de.srsoftware.tools.Error;
|
import de.srsoftware.tools.Error;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.net.MalformedURLException;
|
import java.net.*;
|
||||||
import java.net.URI;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.security.MessageDigest;
|
import java.security.MessageDigest;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.LocalDate;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.ArrayList;
|
import java.time.LocalTime;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public abstract class BaseImporter implements Importer {
|
public abstract class BaseImporter implements Importer {
|
||||||
@@ -35,15 +37,13 @@ public abstract class BaseImporter implements Importer {
|
|||||||
protected abstract String baseUrl();
|
protected abstract String baseUrl();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String description() {
|
public abstract String description();
|
||||||
return "abstract base class to create other importers on";
|
|
||||||
}
|
|
||||||
|
|
||||||
protected List<Attachment> extractAttachments(Tag eventTag) {
|
protected List<Attachment> extractAttachments(Tag eventTag) {
|
||||||
return extractAttachmentsTag(eventTag) //
|
return extractAttachmentsTag(eventTag) //
|
||||||
.optional()
|
.optional()
|
||||||
.stream()
|
.stream()
|
||||||
.flatMap(tag -> tag.find(TagFilter.ofType("img")).stream())
|
.flatMap(tag -> tag.find(IS_IMAGE).stream())
|
||||||
.map(tag -> tag.get("src"))
|
.map(tag -> tag.get("src"))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(url -> url.contains("://") ? url : baseUrl()+url)
|
.map(url -> url.contains("://") ? url : baseUrl()+url)
|
||||||
@@ -55,35 +55,77 @@ public abstract class BaseImporter implements Importer {
|
|||||||
.toList();
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected abstract Predicate<Tag> extractAttachmentsFilter();
|
||||||
|
|
||||||
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
|
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
|
||||||
return extractDescriptionTag(eventTag);
|
var list = eventTag.find(extractAttachmentsFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find attachments tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected Result<String> extractDescription(Tag eventTag) {
|
protected Result<String> extractDescription(Tag eventTag) {
|
||||||
Result<Tag> descriptionTag = extractDescriptionTag(eventTag);
|
Result<Tag> descriptionTag = extractDescriptionTag(eventTag);
|
||||||
if (descriptionTag.optional().isEmpty()) return transform(descriptionTag);
|
if (descriptionTag.optional().isEmpty()) return transform(descriptionTag);
|
||||||
Tag tag = descriptionTag.optional().get();
|
Tag tag = descriptionTag.optional().get();
|
||||||
tag.find(t -> t.is("iframe")).forEach(Tag::remove);
|
tag.find(t -> t.is("iframe")).forEach(Tag::remove);
|
||||||
var inner = tag.inner(2);
|
var inner = tag.inner(2);
|
||||||
if (inner.isPresent()) return Payload.of(inner.get());
|
return inner.isPresent() ? Payload.of(inner.get()) : error("No description found");
|
||||||
return error("No description found");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<Tag> extractDescriptionTag(Tag eventTag);
|
protected abstract Predicate<Tag> extractDescriptionFilter();
|
||||||
|
|
||||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
protected Result<Tag> extractDescriptionTag(Tag eventTag){
|
||||||
return error("not implemented");
|
var list = eventTag.find(extractDescriptionFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find attachments tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected abstract Result<Coords> extractCoords(Tag eventTag);
|
||||||
|
|
||||||
protected Result<LocalDateTime> extractEnd(Tag eventTag) {
|
protected Result<LocalDateTime> extractEnd(Tag eventTag) {
|
||||||
Result<Tag> endTag = extractEndTag(eventTag);
|
Result<LocalDate> date = extractEndDate(eventTag);
|
||||||
if (endTag.optional().isEmpty()) return transform(endTag);
|
Result<LocalTime> time = extractEndTime(eventTag);
|
||||||
return parseEndDate(endTag.optional().get().toString(0));
|
return combine(date,time);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<Tag> extractEndTag(Tag eventTag);
|
protected Result<LocalDate> extractEndDate(Tag eventTag) {
|
||||||
|
Result<Tag> endDateTag = extractEndDateTag(eventTag);
|
||||||
|
var opt = endDateTag.optional();
|
||||||
|
if (opt.isEmpty()) return transform(endDateTag);
|
||||||
|
return parseEndDate(opt.get().strip());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Result<Tag> extractEndDateTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(extractEndDateFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find end date tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract Predicate<Tag> extractEndDateFilter();
|
||||||
|
|
||||||
|
protected Result<LocalTime> extractEndTime(Tag eventTag) {
|
||||||
|
Result<Tag> endTimeTag = extractEndTimeTag(eventTag);
|
||||||
|
var opt = endTimeTag.optional();
|
||||||
|
if (opt.isEmpty()) return transform(endTimeTag);
|
||||||
|
return parseEndTime(opt.get().strip());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Result<Tag> extractEndTimeTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(extractEndTimeFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find end time tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract Predicate<Tag> extractEndTimeFilter();
|
||||||
|
|
||||||
|
|
||||||
|
protected Result<Appointment> extractEvent(Result<Tag> domResult, Link eventPage) {
|
||||||
|
var opt = domResult.optional();
|
||||||
|
if (opt.isEmpty()) return transform(domResult);
|
||||||
|
var eventTag = opt.get();
|
||||||
|
|
||||||
protected Result<Appointment> extractEvent(Tag eventTag, Link eventPage) {
|
|
||||||
long id = 0;
|
long id = 0;
|
||||||
|
|
||||||
var titleResult = extractTitle(eventTag);
|
var titleResult = extractTitle(eventTag);
|
||||||
@@ -116,62 +158,103 @@ public abstract class BaseImporter implements Importer {
|
|||||||
return Payload.of(event);
|
return Payload.of(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Result<Appointment> extractEvent(Result<Tag> domResult, Link eventPage) {
|
/**
|
||||||
return switch (domResult) {
|
* Die ist der Tag auf der Seite, der alle weiteren Event-Daten umfasst.
|
||||||
case Payload<Tag> payload -> extractEvent(payload.get(), eventPage);
|
* Im Prinzip kann der Page-Tag auch direkt weitergereicht werden, dann
|
||||||
case Error<Tag> err -> err.transform();
|
* sind die weiteren Suchen aber Umfangreicher.
|
||||||
default -> invalidParameter(domResult);
|
* Besser ist es daher, den Tag näher einzugrenzen
|
||||||
};
|
* @param pageResult
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected Result<Tag> extractEventTag(Result<Tag> pageResult){
|
||||||
|
var opt = pageResult.optional();
|
||||||
|
if (opt.isEmpty()) return transform(pageResult);
|
||||||
|
var list = opt.get().find(extractEventTagFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find event tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<Tag> extractEventTag(Result<Tag> pageResult);
|
protected abstract Predicate<Tag> extractEventTagFilter();
|
||||||
|
|
||||||
|
|
||||||
protected abstract Result<List<String>> extractEventUrls(Result<Tag> programPage);
|
protected abstract Result<List<String>> extractEventUrls(Result<Tag> programPage);
|
||||||
|
|
||||||
protected List<Link> extractLinks(Tag appointmentTag) {
|
protected List<Link> extractLinks(Tag appointmentTag) {
|
||||||
var links = new ArrayList<Link>();
|
var tag = extractLinksTag(appointmentTag);
|
||||||
|
var opt = tag.optional();
|
||||||
extractLinksTag(appointmentTag) //
|
if (opt.isEmpty()) return List.of();
|
||||||
.map(this::extractLinkAnchors)
|
Tag linksTag = opt.get();
|
||||||
.optional()
|
return linksTag.find(IS_ANCHOR).stream()
|
||||||
.stream()
|
.map(anchor -> {
|
||||||
.flatMap(List::stream)
|
var href = anchor.get(HREF);
|
||||||
.forEach(anchor -> {
|
if (href == null) return null;
|
||||||
var href = anchor.get("href");
|
|
||||||
if (href == null) return;
|
|
||||||
if (!href.contains("://")) href = baseUrl()+href;
|
if (!href.contains("://")) href = baseUrl()+href;
|
||||||
var text = anchor.inner(0).orElse(href);
|
var txt = anchor.strip();
|
||||||
Payload //
|
return BaseImporter.url(Payload.of(href)).optional().map(url -> new Link(url,txt)).orElse(null);
|
||||||
.of(href)
|
})
|
||||||
.map(BaseImporter::url)
|
.filter(Objects::nonNull)
|
||||||
.map(url -> link(url, text))
|
.toList();
|
||||||
.optional()
|
|
||||||
.ifPresent(links::add);
|
|
||||||
});
|
|
||||||
return links;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult);
|
|
||||||
|
|
||||||
protected Result<Tag> extractLinksTag(Tag eventTag) {
|
protected Result<Tag> extractLinksTag(Tag eventTag) {
|
||||||
return extractDescriptionTag(eventTag);
|
var list = eventTag.find(extractLinksFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find links tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected abstract Predicate<Tag> extractLinksFilter();
|
||||||
|
|
||||||
protected Result<String> extractLocation(Tag eventTag) {
|
protected Result<String> extractLocation(Tag eventTag) {
|
||||||
Result<Tag> locationTag = extractLocationTag(eventTag);
|
Result<Tag> locationTag = extractLocationTag(eventTag);
|
||||||
if (locationTag.optional().isEmpty()) return transform(locationTag);
|
if (locationTag.optional().isEmpty()) return transform(locationTag);
|
||||||
return Payload.of(locationTag.optional().get().toString(2));
|
return Payload.of(locationTag.optional().get().toString(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<Tag> extractLocationTag(Tag eventTag);
|
protected Result<Tag> extractLocationTag(Tag eventTag){
|
||||||
|
var list = eventTag.find(extractLocationFilter());
|
||||||
protected Result<LocalDateTime> extractStart(Tag eventTag) {
|
if (list.isEmpty()) return error("Failed to find location tag");
|
||||||
Result<Tag> startTag = extractStartTag(eventTag);
|
return Payload.of(list.getFirst());
|
||||||
if (startTag.optional().isEmpty()) return transform(startTag);
|
|
||||||
return parseStartDate(startTag.optional().get().strip());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<Tag> extractStartTag(Tag eventTag);
|
protected abstract Predicate<Tag> extractLocationFilter();
|
||||||
|
|
||||||
|
|
||||||
|
protected Result<LocalDateTime> extractStart(Tag eventTag) {
|
||||||
|
Result<LocalDate> date = extractStartDate(eventTag);
|
||||||
|
Result<LocalTime> time = extractStartTime(eventTag);
|
||||||
|
return combine(date,time);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Result<LocalDate> extractStartDate(Tag eventTag) {
|
||||||
|
Result<Tag> startDateTag = extractStartDateTag(eventTag);
|
||||||
|
var opt = startDateTag.optional();
|
||||||
|
if (opt.isEmpty()) return transform(startDateTag);
|
||||||
|
return parseStartDate(opt.get().strip());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Result<Tag> extractStartDateTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(extractStartDateFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find start date tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract Predicate<Tag> extractStartDateFilter();
|
||||||
|
|
||||||
|
protected Result<LocalTime> extractStartTime(Tag eventTag) {
|
||||||
|
Result<Tag> startTimeTag = extractStartTimeTag(eventTag);
|
||||||
|
var opt = startTimeTag.optional();
|
||||||
|
if (opt.isEmpty()) return transform(startTimeTag);
|
||||||
|
return parseStartTime(opt.get().strip());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Result<Tag> extractStartTimeTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(extractStartTimeFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find start time tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract Predicate<Tag> extractStartTimeFilter();
|
||||||
|
|
||||||
|
|
||||||
protected abstract List<String> extractTags(Tag eventTag);
|
protected abstract List<String> extractTags(Tag eventTag);
|
||||||
|
|
||||||
@@ -179,11 +262,16 @@ public abstract class BaseImporter implements Importer {
|
|||||||
Result<Tag> titleTag = extractTitleTag(eventTag);
|
Result<Tag> titleTag = extractTitleTag(eventTag);
|
||||||
if (titleTag.optional().isEmpty()) return transform(titleTag);
|
if (titleTag.optional().isEmpty()) return transform(titleTag);
|
||||||
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
|
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
|
||||||
return inner.isPresent() ? Payload.of(inner.get()) :
|
return inner.isPresent() ? Payload.of(inner.get()) : error("No title found");
|
||||||
error("No title found");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<Tag> extractTitleTag(Tag eventTag);
|
protected Result<Tag> extractTitleTag(Tag eventTag){
|
||||||
|
var list = eventTag.find(extractTitleFilter());
|
||||||
|
if (list.isEmpty()) return error("Failed to find title tag");
|
||||||
|
return Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract Predicate<Tag> extractTitleFilter();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<Appointment> fetch() {
|
public Stream<Appointment> fetch() {
|
||||||
@@ -209,8 +297,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
|
|
||||||
protected static Result<Link> link(Result<URL> url, String text) {
|
protected static Result<Link> link(Result<URL> url, String text) {
|
||||||
var opt = url.optional();
|
var opt = url.optional();
|
||||||
if (opt.isEmpty()) return transform(url);
|
return opt.isEmpty() ? transform(url) : Payload.of(new Link(opt.get(), text));
|
||||||
return Payload.of(new Link(opt.get(), text));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Result<Appointment> loadEvent(Result<URL> urlResult) {
|
protected Result<Appointment> loadEvent(Result<URL> urlResult) {
|
||||||
@@ -227,44 +314,38 @@ public abstract class BaseImporter implements Importer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected Result<InputStream> open(Result<URL> url) {
|
protected Result<InputStream> open(Result<URL> url) {
|
||||||
switch (url) {
|
var opt = url.optional();
|
||||||
case Payload<URL> payload:
|
if (opt.isEmpty()) return transform(url);
|
||||||
try {
|
try {
|
||||||
return Payload.of(payload.get().openConnection().getInputStream());
|
var conn = (HttpURLConnection) opt.get().openConnection();
|
||||||
|
conn.setRequestProperty("Accept","*/*");
|
||||||
|
conn.setRequestProperty("Host",opt.get().getHost());
|
||||||
|
conn.setRequestProperty("User-Agent","OpenCloudCal/0.1");
|
||||||
|
return Payload.of(conn.getInputStream());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return error(e, "Failed to open %s", payload, e);
|
return error(e, "Failed to open %s", url, e);
|
||||||
}
|
|
||||||
case Error<URL> error:
|
|
||||||
return error.transform();
|
|
||||||
default:
|
|
||||||
return invalidParameter(url);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<LocalDateTime> parseEndDate(String string);
|
}
|
||||||
|
|
||||||
protected abstract Result<LocalDateTime> parseStartDate(String string);
|
protected abstract Result<LocalDate> parseEndDate(String string);
|
||||||
|
protected abstract Result<LocalTime> parseEndTime(String string);
|
||||||
|
|
||||||
|
protected abstract Result<LocalDate> parseStartDate(String string);
|
||||||
|
protected abstract Result<LocalTime> parseStartTime(String string);
|
||||||
|
|
||||||
protected Result<Tag> parseXML(Result<InputStream> inputStream) {
|
protected Result<Tag> parseXML(Result<InputStream> inputStream) {
|
||||||
return switch (inputStream) {
|
var opt = inputStream.optional();
|
||||||
case Payload<InputStream> payload -> XMLParser.parse(payload.get());
|
return opt.isEmpty() ? transform((inputStream)) : XMLParser.parse(opt.get());
|
||||||
case Error<InputStream> error -> error.transform();
|
|
||||||
default -> invalidParameter(inputStream);
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Result<InputStream> preload(Result<InputStream> inputStream) {
|
protected Result<InputStream> preload(Result<InputStream> inputStream) {
|
||||||
switch (inputStream) {
|
var opt = inputStream.optional();
|
||||||
case Payload<InputStream> payload:
|
if (opt.isEmpty()) return transform(inputStream);
|
||||||
try {
|
try {
|
||||||
return Payload.of(XMLParser.preload(payload.get()));
|
return Payload.of(XMLParser.preload(opt.get()));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return error(e, "Failed to buffer data from %s", payload);
|
return error(e, "Failed to buffer data from %s", inputStream);
|
||||||
}
|
|
||||||
case Error<InputStream> error:
|
|
||||||
return error.transform();
|
|
||||||
default:
|
|
||||||
return invalidParameter(inputStream);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,12 +2,17 @@
|
|||||||
package de.srsoftware.cal;
|
package de.srsoftware.cal;
|
||||||
|
|
||||||
import static de.srsoftware.tools.Error.error;
|
import static de.srsoftware.tools.Error.error;
|
||||||
|
import static de.srsoftware.tools.Result.transform;
|
||||||
|
|
||||||
import de.srsoftware.cal.api.Coords;
|
import de.srsoftware.cal.api.Coords;
|
||||||
import de.srsoftware.tools.Payload;
|
import de.srsoftware.tools.Payload;
|
||||||
import de.srsoftware.tools.Result;
|
import de.srsoftware.tools.Result;
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.LocalTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class Util {
|
public class Util {
|
||||||
public static final String BEGIN = "BEGIN";
|
public static final String BEGIN = "BEGIN";
|
||||||
@@ -26,8 +31,18 @@ public class Util {
|
|||||||
public static final String VEVENT = "VEVENT";
|
public static final String VEVENT = "VEVENT";
|
||||||
public static final String VCALENDAR = "VCALENDAR";
|
public static final String VCALENDAR = "VCALENDAR";
|
||||||
|
|
||||||
|
public static final Pattern GERMAN_DATE_PATTERN = Pattern.compile("\\D(\\d\\d?)\\.(\\d\\d?)\\.(\\d{4})\\D");
|
||||||
|
public static final Pattern GERMAN_TIME_PATTERN = Pattern.compile("\\D(\\d\\d?):(\\d\\d?)(:(\\d\\d?))?\\D");
|
||||||
|
|
||||||
private Util(){}
|
private Util(){}
|
||||||
|
|
||||||
|
|
||||||
|
public static Result<LocalDateTime> combine(Result<LocalDate> date, Result<LocalTime> time) {
|
||||||
|
if (date.optional().isEmpty())return transform(date);
|
||||||
|
if (time.optional().isEmpty())return transform(time);
|
||||||
|
return Payload.of(LocalDateTime.of(date.optional().get(),time.optional().get()));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* formats a content line as defined in <a href="https://datatracker.ietf.org/doc/html/rfc5545#section-3.1">iCalendar spec</a>
|
* formats a content line as defined in <a href="https://datatracker.ietf.org/doc/html/rfc5545#section-3.1">iCalendar spec</a>
|
||||||
* @param key the content line key
|
* @param key the content line key
|
||||||
@@ -76,6 +91,28 @@ public class Util {
|
|||||||
.replace(":","/");
|
.replace(":","/");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Result<LocalDate> parseGermanDate(String s){
|
||||||
|
var match = GERMAN_DATE_PATTERN.matcher(s);
|
||||||
|
if (match.find()){
|
||||||
|
var day = Integer.parseInt(match.group(1));
|
||||||
|
var month = Integer.parseInt(match.group(2));
|
||||||
|
var year = Integer.parseInt(match.group(3));
|
||||||
|
return Payload.of(LocalDate.of(year,month,day));
|
||||||
|
}
|
||||||
|
return error("Failed to find date");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<LocalTime> parseGermanTime(String s){
|
||||||
|
var match = GERMAN_TIME_PATTERN.matcher(s);
|
||||||
|
if (match.find()){
|
||||||
|
var hour = Integer.parseInt(match.group(1));
|
||||||
|
var minute = Integer.parseInt(match.group(2));
|
||||||
|
var sec = match.group(4);
|
||||||
|
var second = sec == null ? 0 : Integer.parseInt(sec);
|
||||||
|
return Payload.of(LocalTime.of(hour,minute,second));
|
||||||
|
}
|
||||||
|
return error("Failed to find date");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wraps a text (list of vevents in a vcalendar, as described in th <a href="https://datatracker.ietf.org/doc/html/rfc5545#section-3.4">iCalendar spec</a>
|
* wraps a text (list of vevents in a vcalendar, as described in th <a href="https://datatracker.ietf.org/doc/html/rfc5545#section-3.4">iCalendar spec</a>
|
||||||
|
|||||||
@@ -5,5 +5,5 @@ dependencies {
|
|||||||
implementation(project(":de.srsoftware.cal.base"))
|
implementation(project(":de.srsoftware.cal.base"))
|
||||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||||
implementation("de.srsoftware:tools.util:1.3.0")
|
implementation("de.srsoftware:tools.util:1.3.0")
|
||||||
implementation("de.srsoftware:tools.web:1.3.9")
|
implementation("de.srsoftware:tools.web:1.3.10")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,143 @@
|
|||||||
|
/* © SRSoftware 2024 */
|
||||||
|
package de.srsoftware.cal.importer.jena;
|
||||||
|
|
||||||
|
import static de.srsoftware.tools.Error.error;
|
||||||
|
import static de.srsoftware.tools.Result.transform;
|
||||||
|
import static de.srsoftware.tools.TagFilter.*;
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
|
import de.srsoftware.cal.BaseImporter;
|
||||||
|
import de.srsoftware.tools.*;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public abstract class CosmicDawn extends BaseImporter {
|
||||||
|
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d\\d?).(\\d\\d?).(\\d{4}).*(\\d\\d?):(\\d\\d?)");
|
||||||
|
|
||||||
|
public CosmicDawn() throws NoSuchAlgorithmException {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String baseUrl() {
|
||||||
|
return "https://www.kuba-jena.de";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(attributeEndsWith("class","event-body-content"));
|
||||||
|
return list.isEmpty() ? error("failed to find <div class=\"…event-body-content\">") : Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Result<Tag> extractEndTag(Tag eventTag) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
|
||||||
|
if (pageResult.optional().isEmpty()) return transform(pageResult);
|
||||||
|
List<Tag> list = pageResult.optional().get().find(attributeEquals("class", "inside-article"));
|
||||||
|
return (list.isEmpty()) ? error("Failed to find <div class=\"inside-article\">!") : Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||||
|
var page = programPage.optional();
|
||||||
|
if (page.isEmpty()) return transform(programPage);
|
||||||
|
try {
|
||||||
|
Files.writeString(Path.of("/tmp/test.txt"),page.get().toString(2));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
var list = page.get().find(attributeEquals("class","event_listings_main"));
|
||||||
|
var urlList = list.stream()
|
||||||
|
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
|
||||||
|
.map(tag -> tag.get("href"))
|
||||||
|
.toList();
|
||||||
|
return Payload.of(urlList);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Result<Tag> extractStartTag(Tag eventTag) {
|
||||||
|
var dateTags = eventTag.find(attributeContains("class","event-date-time"));
|
||||||
|
if (dateTags.isEmpty()) return error("Start date not found!");
|
||||||
|
var times = eventTag.find(attributeEquals("class","event_time")).stream()
|
||||||
|
.flatMap(tag -> tag.find(IS_SPAN).stream())
|
||||||
|
.filter(tag -> tag.toString().contains("Begin"))
|
||||||
|
.toList();
|
||||||
|
if (times.isEmpty()) return error("Start time not found!");
|
||||||
|
var div = Tag.of("div").add(dateTags.getFirst()).add(times.getFirst());
|
||||||
|
return Payload.of(div);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> extractTags(Tag eventTag) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractTitleTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(ofType("h1"));
|
||||||
|
return list.isEmpty() ? error("failed to find <h1>") : Payload.of(list.getFirst());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDate> parseEndDate(String string) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDate> parseStartDate(String date) {
|
||||||
|
var matcher = START_DATE_PATTERN.matcher(date);
|
||||||
|
if (matcher.find()){
|
||||||
|
int day = Integer.parseInt(matcher.group(1));
|
||||||
|
int mon = Integer.parseInt(matcher.group(2));
|
||||||
|
int year= Integer.parseInt(matcher.group(3));
|
||||||
|
int hour = Integer.parseInt(matcher.group(4));
|
||||||
|
int min = Integer.parseInt(matcher.group(5));
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Die Kuba-Seite haut einen haufen Script mit raus, der dazu führt, dass die Tags nicht richtig geparsed werden.
|
||||||
|
* Also schneiden wir den kompletten header ab...
|
||||||
|
* @param inputStream eingehender InputStream, verpackt in Result
|
||||||
|
* @return ausgehender InputStream, verpackt in Result
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Result<InputStream> preload(Result<InputStream> inputStream) {
|
||||||
|
var opt = inputStream.optional();
|
||||||
|
if (opt.isEmpty()) return transform(inputStream);
|
||||||
|
try {
|
||||||
|
var input = opt.get();
|
||||||
|
var bos = new ByteArrayOutputStream();
|
||||||
|
input.transferTo(bos);
|
||||||
|
input.close();
|
||||||
|
String code = bos.toString(UTF_8);
|
||||||
|
var pos = code.indexOf("<body");
|
||||||
|
return Payload.of(new ByteArrayInputStream(code.substring(pos).getBytes(UTF_8)));
|
||||||
|
} catch (IOException e) {
|
||||||
|
return error(e, "Failed to buffer data from %s", inputStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String programURL() {
|
||||||
|
return baseUrl()+"/veranstaltungen/";
|
||||||
|
//return "http://httpbin.org/headers";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,24 +1,25 @@
|
|||||||
/* © SRSoftware 2024 */
|
/* © SRSoftware 2024 */
|
||||||
package de.srsoftware.cal.importer.jena;
|
package de.srsoftware.cal.importer.jena;
|
||||||
|
|
||||||
import static de.srsoftware.tools.Error.error;
|
import static de.srsoftware.cal.Util.*;
|
||||||
import static de.srsoftware.tools.Result.transform;
|
import static de.srsoftware.tools.Result.transform;
|
||||||
|
import static de.srsoftware.tools.Tag.CLASS;
|
||||||
|
import static de.srsoftware.tools.Tag.DIV;
|
||||||
import static de.srsoftware.tools.TagFilter.*;
|
import static de.srsoftware.tools.TagFilter.*;
|
||||||
|
|
||||||
import de.srsoftware.cal.BaseImporter;
|
import de.srsoftware.cal.BaseImporter;
|
||||||
import de.srsoftware.cal.api.Coords;
|
import de.srsoftware.cal.api.Coords;
|
||||||
import de.srsoftware.tools.*;
|
import de.srsoftware.tools.*;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDate;
|
||||||
|
import java.time.LocalTime;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
public class Kassablanca extends BaseImporter {
|
public class Kassablanca extends BaseImporter {
|
||||||
public static final String BASE_URL = "https://www.kassablanca.de";
|
public static final String BASE_URL = "https://www.kassablanca.de";
|
||||||
private static final String APPOINTMENT_TAG_ID = "entry-content";
|
|
||||||
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d+).(\\d+).(\\d+).*Beginn\\s*(\\d+):(\\d+)\\s*Uhr");
|
|
||||||
private static final String LOCATION = "Kassablanca e.V., Felsenkellerstr. 13a, 07745 Jena";
|
|
||||||
private static final Coords COORDS = new Coords(50.92093, 11.57788);
|
private static final Coords COORDS = new Coords(50.92093, 11.57788);
|
||||||
|
private static final String LOCATION = "Kassablanca e.V., Felsenkellerstr. 13a, 07745 Jena";
|
||||||
|
|
||||||
public Kassablanca() throws NoSuchAlgorithmException {
|
public Kassablanca() throws NoSuchAlgorithmException {
|
||||||
super();
|
super();
|
||||||
@@ -29,29 +30,39 @@ public class Kassablanca extends BaseImporter {
|
|||||||
return BASE_URL;
|
return BASE_URL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String description() {
|
||||||
|
return "Importiert Events des Studentenclubs „Kassablanca“ in Jena";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||||
|
return attributeEquals(CLASS,"entry-content");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractDescriptionFilter() {
|
||||||
|
return attributeEquals(CLASS,"se-content");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||||
return Payload.of(COORDS);
|
return Payload.of(COORDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
|
protected Predicate<Tag> extractEndDateFilter() {
|
||||||
var list = eventTag.find(attributeHas("class", "se-content"));
|
return null;
|
||||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
|
||||||
return error("Failed to find description tag");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractEndTag(Tag eventTag) {
|
protected Predicate<Tag> extractEndTimeFilter() {
|
||||||
return error("end date not supported");
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
|
protected Predicate<Tag> extractEventTagFilter() {
|
||||||
if (pageResult.optional().isEmpty()) return transform(pageResult);
|
return attributeEquals(CLASS,"entry-content");
|
||||||
var list = pageResult.optional().get().find(attributeEquals("class", APPOINTMENT_TAG_ID));
|
|
||||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
|
||||||
return error("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -62,7 +73,7 @@ public class Kassablanca extends BaseImporter {
|
|||||||
.find(attributeHas("class", "eventrow"))
|
.find(attributeHas("class", "eventrow"))
|
||||||
.stream()
|
.stream()
|
||||||
.flatMap(t -> t.find(ofType("h3")).stream())
|
.flatMap(t -> t.find(ofType("h3")).stream())
|
||||||
.map(t -> t.find(ofType("a")))
|
.map(t -> t.find(IS_ANCHOR))
|
||||||
.flatMap(List::stream)
|
.flatMap(List::stream)
|
||||||
.map(t -> t.get("href"))
|
.map(t -> t.get("href"))
|
||||||
.toList();
|
.toList();
|
||||||
@@ -70,29 +81,36 @@ public class Kassablanca extends BaseImporter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) {
|
protected Predicate<Tag> extractLinksFilter() {
|
||||||
if (tagResult.optional().isEmpty()) return transform(tagResult);
|
return attributeEquals(CLASS,"se-container");
|
||||||
var tag = tagResult.optional().get();
|
|
||||||
tag.find(attributeEquals("id", "filterbar")).stream().findAny().ifPresent(Tag::remove); // remove div with unrelated links
|
|
||||||
var anchors = tag.find(withAttribute("href"));
|
|
||||||
return Payload.of(anchors);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractLinksTag(Tag eventTag) {
|
protected Result<Tag> extractLinksTag(Tag eventTag) {
|
||||||
return Payload.of(eventTag);
|
var top = eventTag.find(attributeEquals(CLASS,"se-container"));
|
||||||
|
var bottom = eventTag.find(attributeEquals(CLASS, "se-content"));
|
||||||
|
var common = Tag.of(DIV).addAll(top).addAll(bottom);
|
||||||
|
return Payload.of(common);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
protected Result<String> extractLocation(Tag eventTag) {
|
||||||
return Payload.of(new Text(LOCATION));
|
return Payload.of(LOCATION);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractStartTag(Tag eventTag) {
|
protected Predicate<Tag> extractLocationFilter() {
|
||||||
List<Tag> tags = eventTag.find(attributeEquals("class", "se-header"));
|
return null;
|
||||||
if (tags.size() == 1) return Payload.of(tags.getFirst());
|
}
|
||||||
return error("Failed to find event time information");
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractStartDateFilter() {
|
||||||
|
return attributeEquals(CLASS,"se-header");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractStartTimeFilter() {
|
||||||
|
return attributeEquals(CLASS,"se-header");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -101,30 +119,28 @@ public class Kassablanca extends BaseImporter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractTitleTag(Tag eventTag) {
|
protected Predicate<Tag> extractTitleFilter() {
|
||||||
var list = eventTag.find(ofType("h1"));
|
return ofType("h1");
|
||||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
|
||||||
return error("Failed to find title tag");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<LocalDateTime> parseEndDate(String string) {
|
protected Result<LocalDate> parseEndDate(String string) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<LocalDateTime> parseStartDate(String string) {
|
protected Result<LocalTime> parseEndTime(String string) {
|
||||||
var matcher = START_DATE_PATTERN.matcher(string);
|
return null;
|
||||||
if (matcher.find()) {
|
|
||||||
var day = Integer.parseInt(matcher.group(1));
|
|
||||||
var month = Integer.parseInt(matcher.group(2));
|
|
||||||
var year = Integer.parseInt(matcher.group(3));
|
|
||||||
var hour = Integer.parseInt(matcher.group(4));
|
|
||||||
var minute = Integer.parseInt(matcher.group(5));
|
|
||||||
var date = LocalDateTime.of(year, month, day, hour, minute);
|
|
||||||
return Payload.of(date);
|
|
||||||
}
|
}
|
||||||
return error("Could not recognize start date/time");
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDate> parseStartDate(String string) {
|
||||||
|
return parseGermanDate(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalTime> parseStartTime(String string) {
|
||||||
|
return parseGermanTime(string);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -12,11 +12,12 @@ import de.srsoftware.tools.Payload;
|
|||||||
import de.srsoftware.tools.Result;
|
import de.srsoftware.tools.Result;
|
||||||
import de.srsoftware.tools.Tag;
|
import de.srsoftware.tools.Tag;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.LocalDate;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class Rosenkeller extends BaseImporter {
|
public abstract class Rosenkeller extends BaseImporter {
|
||||||
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
|
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
|
||||||
private static final String BASE_URL = "https://rosenkeller.org";
|
private static final String BASE_URL = "https://rosenkeller.org";
|
||||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
||||||
@@ -51,7 +52,7 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
|||||||
return error("Failed to find description tag");
|
return error("Failed to find description tag");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Result<Tag> extractEndTag(Tag eventTag) {
|
protected Result<Tag> extractEndTag(Tag eventTag) {
|
||||||
return error("extractEndTag(…) not supported");
|
return error("extractEndTag(…) not supported");
|
||||||
}
|
}
|
||||||
@@ -78,19 +79,12 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
|||||||
return Payload.of(list);
|
return Payload.of(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) {
|
|
||||||
if (tagResult.optional().isEmpty()) return transform(tagResult);
|
|
||||||
List<Tag> list = tagResult.optional().get().find(attributeStartsWith("id", "post-")).stream().flatMap(tag -> tag.find(ofType("a")).stream()).toList();
|
|
||||||
return Payload.of(list);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
||||||
return Payload.of(new Tag("span").content(DEFAULT_LOCATION));
|
return Payload.of(new Tag("span").content(DEFAULT_LOCATION));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Result<Tag> extractStartTag(Tag eventTag) {
|
protected Result<Tag> extractStartTag(Tag eventTag) {
|
||||||
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start"));
|
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start"));
|
||||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||||
@@ -110,12 +104,12 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<LocalDateTime> parseEndDate(String text) {
|
protected Result<LocalDate> parseEndDate(String text) {
|
||||||
return error("parseEndDate(…) not supported");
|
return error("parseEndDate(…) not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Result<LocalDateTime> parseStartDate(String text) {
|
protected Result<LocalDate> parseStartDate(String text) {
|
||||||
var match = DATE_PATTERN.matcher(text);
|
var match = DATE_PATTERN.matcher(text);
|
||||||
if (match.find()) {
|
if (match.find()) {
|
||||||
var dayOfMonth = Integer.parseInt(match.group(1));
|
var dayOfMonth = Integer.parseInt(match.group(1));
|
||||||
@@ -127,7 +121,7 @@ private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
|||||||
var now = LocalDateTime.now();
|
var now = LocalDateTime.now();
|
||||||
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute);
|
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute);
|
||||||
if (date.isBefore(now)) date = date.plusYears(1);
|
if (date.isBefore(now)) date = date.plusYears(1);
|
||||||
return Payload.of(date);
|
//return Payload.of(date);
|
||||||
}
|
}
|
||||||
return error("Failed to recognize a date in \"%s\"", text);
|
return error("Failed to recognize a date in \"%s\"", text);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user