5 changed files with 436 additions and 278 deletions
@ -0,0 +1,305 @@
@@ -0,0 +1,305 @@
|
||||
/* © SRSoftware 2024 */ |
||||
package de.srsoftware.cal.importer; |
||||
|
||||
import static de.srsoftware.tools.TagFilter.ofType; |
||||
|
||||
import de.srsoftware.cal.api.*; |
||||
import de.srsoftware.tools.*; |
||||
import de.srsoftware.tools.Error; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.net.MalformedURLException; |
||||
import java.net.URI; |
||||
import java.net.URISyntaxException; |
||||
import java.net.URL; |
||||
import java.time.LocalDateTime; |
||||
import java.util.ArrayList; |
||||
import java.util.List; |
||||
import java.util.Objects; |
||||
import java.util.Optional; |
||||
import java.util.stream.Stream; |
||||
|
||||
public abstract class BaseImporter implements Importer { |
||||
protected abstract String baseUrl(); |
||||
|
||||
@Override |
||||
public String description() { |
||||
return "abstract base class to create other importers on"; |
||||
} |
||||
|
||||
protected List<Attachment> extractAttachments(Tag eventTag) { |
||||
return extractAttachmentsTag(eventTag) //
|
||||
.optional() |
||||
.stream() |
||||
.flatMap(tag -> tag.find(ofType("img")).stream()) |
||||
.map(tag -> tag.get("src")) |
||||
.filter(Objects::nonNull) |
||||
.map(Payload::of) |
||||
.map(this::url) |
||||
.map(this::toAttachment) |
||||
.map(Result::optional) |
||||
.flatMap(Optional::stream) |
||||
.toList(); |
||||
} |
||||
|
||||
protected Result<Tag> extractAttachmentsTag(Tag eventTag) { |
||||
return extractDescriptionTag(eventTag); |
||||
} |
||||
|
||||
|
||||
protected Result<String> extractDescription(Tag eventTag){ |
||||
Result<Tag> titleTag = extractDescriptionTag(eventTag); |
||||
if (titleTag.optional().isEmpty()) return transform(titleTag); |
||||
var inner = titleTag.optional().flatMap(tag -> tag.inner(2)); |
||||
if (inner.isPresent()) return Payload.of(inner.get()); |
||||
return Error.of("No description found"); |
||||
} |
||||
|
||||
protected abstract Result<Tag> extractDescriptionTag(Tag eventTag); |
||||
|
||||
protected Result<Coords> extractCoords(Tag eventTag) { |
||||
return Error.of("not implemented"); |
||||
} |
||||
|
||||
protected Result<LocalDateTime> extractEnd(Tag eventTag) { |
||||
Result<Tag> endTag = extractEndTag(eventTag); |
||||
if (endTag.optional().isEmpty()) return transform(endTag); |
||||
return parseEndDate(endTag.optional().get().toString(0)); |
||||
} |
||||
|
||||
protected abstract Result<Tag> extractEndTag(Tag eventTag); |
||||
|
||||
protected Result<Appointment> extractEvent(Tag eventTag, Link eventPage) { |
||||
long id = 0; |
||||
|
||||
var titleResult = extractTitle(eventTag); |
||||
if (titleResult.optional().isEmpty()) return transform(titleResult); |
||||
String title = titleResult.optional().get(); |
||||
|
||||
var descriptionResult = extractDescription(eventTag); |
||||
if (descriptionResult.optional().isEmpty()) return transform(descriptionResult); |
||||
var description = descriptionResult.optional().get(); |
||||
|
||||
var startResult = extractStart(eventTag); |
||||
if (startResult.optional().isEmpty()) return transform(startResult); |
||||
var start = startResult.optional().get(); |
||||
|
||||
var endResult = extractEnd(eventTag); |
||||
var end = endResult.optional().orElse(null); |
||||
|
||||
var locationResult = extractLocation(eventTag); |
||||
if (locationResult.optional().isEmpty()) return transform(locationResult); |
||||
var location = locationResult.optional().get(); |
||||
|
||||
var event = new BaseAppointment(id, title, description, start, end, location) //
|
||||
.add(extractAttachments(eventTag)) |
||||
.addLinks(extractLinks(eventTag)) |
||||
.tags(extractTags(eventTag)); |
||||
|
||||
extractCoords(eventTag).optional().ifPresent(event::coords); |
||||
|
||||
return Payload.of(event); |
||||
} |
||||
|
||||
|
||||
private Result<Appointment> extractEvent(Result<Tag> domResult, Link eventPage) { |
||||
return switch (domResult) { |
||||
case Payload<Tag> payload -> extractEvent(payload.get(), eventPage); |
||||
case Error<Tag> err -> err.transform(); |
||||
default -> invalidParameter(domResult); |
||||
}; |
||||
} |
||||
|
||||
protected abstract Result<Tag> extractEventTag(Result<Tag> pageResult); |
||||
|
||||
protected abstract Result<List<String>> extractEventUrls(Result<Tag> programPage); |
||||
|
||||
|
||||
protected List<Link> extractLinks(Tag appointmentTag) { |
||||
var links = new ArrayList<Link>(); |
||||
|
||||
extractLinksTag(appointmentTag) //
|
||||
.map(this::extractLinkAnchors) |
||||
.optional() |
||||
.stream() |
||||
.flatMap(List::stream).forEach(anchor -> { |
||||
var href = anchor.get("href"); |
||||
if (href == null) return; |
||||
if (!href.contains("://")) href = baseUrl() + href; |
||||
var text = anchor.inner(0).orElse(href); |
||||
Payload //
|
||||
.of(href) |
||||
.map(this::url) |
||||
.optional() |
||||
.map(url -> new Link(url, text)) |
||||
.ifPresent(links::add); |
||||
}); |
||||
return links; |
||||
} |
||||
|
||||
public abstract Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult); |
||||
|
||||
private Result<Tag> extractLinksTag(Tag eventTag) { |
||||
return extractDescriptionTag(eventTag); |
||||
} |
||||
|
||||
protected Result<String> extractLocation(Tag eventTag) { |
||||
Result<Tag> locationTag = extractLocationTag(eventTag); |
||||
if (locationTag.optional().isEmpty()) return transform(locationTag); |
||||
return Payload.of(locationTag.optional().get().toString(2)); |
||||
} |
||||
|
||||
protected abstract Result<Tag> extractLocationTag(Tag eventTag); |
||||
|
||||
|
||||
protected Result<LocalDateTime> extractStart(Tag eventTag) { |
||||
Result<Tag> endTag = extractStartTag(eventTag); |
||||
if (endTag.optional().isEmpty()) return transform(endTag); |
||||
return parseStartDate(endTag.optional().get().toString(0)); |
||||
} |
||||
|
||||
protected abstract Result<Tag> extractStartTag(Tag eventTag); |
||||
|
||||
|
||||
protected abstract List<String> extractTags(Tag eventTag); |
||||
|
||||
protected Result<String> extractTitle(Tag eventTag) { |
||||
Result<Tag> locationTag = extractTitleTag(eventTag); |
||||
if (locationTag.optional().isEmpty()) return transform(locationTag); |
||||
var inner = locationTag.optional().flatMap(tag -> tag.inner(2)); |
||||
if (inner.isPresent()) return Payload.of(inner.get()); |
||||
return Error.of("No title found"); |
||||
} |
||||
|
||||
protected abstract Result<Tag> extractTitleTag(Tag eventTag); |
||||
|
||||
|
||||
@Override |
||||
public Stream<Appointment> fetch() { |
||||
var url = Payload.of(programURL()); |
||||
Stream<Result<String>> stream = url(url) |
||||
.map(this::open) //
|
||||
.map(this::preload) |
||||
.map(this::parseXML) |
||||
.map(this::extractEventUrls) |
||||
.stream(); |
||||
return stream //
|
||||
.map(this::url) |
||||
.map(this::loadEvent) |
||||
.flatMap(result -> result.optional().stream()); |
||||
} |
||||
|
||||
protected static <T> Result<T> invalidParameter(Result<?> result) { |
||||
return Error.format("Invalid parameter: %s", result.getClass().getSimpleName()); |
||||
} |
||||
|
||||
protected Result<Appointment> loadEvent(Result<URL> urlResult) { |
||||
var link = urlResult //
|
||||
.optional().map(url -> new Link(url, "Event-Seite")).orElse(null); |
||||
return urlResult //
|
||||
.map(this::open) |
||||
.map(this::preload) |
||||
.map(this::parseXML) |
||||
.map(this::extractEventTag) |
||||
.map(tagResult -> extractEvent(tagResult, link)); |
||||
} |
||||
|
||||
protected Result<InputStream> open(Result<URL> url) { |
||||
switch (url) { |
||||
case Payload<URL> payload: |
||||
try { |
||||
return Payload.of(payload.get().openConnection().getInputStream()); |
||||
} catch (IOException e) { |
||||
return Error.of("Failed to open %s".formatted(payload), e); |
||||
} |
||||
case Error<URL> error: |
||||
return error.transform(); |
||||
default: |
||||
return invalidParameter(url); |
||||
} |
||||
} |
||||
|
||||
protected abstract Result<LocalDateTime> parseEndDate(String string); |
||||
|
||||
protected abstract Result<LocalDateTime> parseStartDate(String string); |
||||
|
||||
protected Result<Tag> parseXML(Result<InputStream> inputStream) { |
||||
return switch (inputStream) { |
||||
case Payload<InputStream> payload -> XMLParser.parse(payload.get()); |
||||
case Error<InputStream> error -> error.transform(); |
||||
default -> invalidParameter(inputStream); |
||||
}; |
||||
} |
||||
|
||||
protected Result<InputStream> preload(Result<InputStream> inputStream) { |
||||
switch (inputStream) { |
||||
case Payload<InputStream> payload: |
||||
try { |
||||
return Payload.of(XMLParser.preload(payload.get())); |
||||
} catch (IOException e) { |
||||
return Error.of("Failed to buffer data from %s".formatted(payload), e); |
||||
} |
||||
case Error<InputStream> error: |
||||
return error.transform(); |
||||
default: |
||||
return invalidParameter(inputStream); |
||||
} |
||||
} |
||||
|
||||
protected abstract String programURL(); |
||||
|
||||
protected Result<Attachment> toAttachment(Result<URL> urlResult) { |
||||
switch (urlResult) { |
||||
case Payload<URL> payload: |
||||
try { |
||||
var mime = payload.get().openConnection().getContentType(); |
||||
return Payload.of(new Attachment(payload.get(), mime)); |
||||
} catch (Exception e) { |
||||
return Error.format("Failed to read mime type of %s", payload); |
||||
} |
||||
case Error<URL> err: |
||||
return err.transform(); |
||||
default: |
||||
return invalidParameter(urlResult); |
||||
} |
||||
} |
||||
|
||||
protected static Result<Integer> toNumericMonth(String month) { |
||||
month = month.toLowerCase(); |
||||
if (month.startsWith("ja")) return Payload.of(1); |
||||
if (month.startsWith("f")) return Payload.of(2); |
||||
if ("may".equals(month) || "mai".equals(month)) return Payload.of(5); |
||||
if (month.startsWith("m")) return Payload.of(3); |
||||
if (month.startsWith("ap")) return Payload.of(4); |
||||
if (month.startsWith("jun")) return Payload.of(6); |
||||
if (month.startsWith("jul")) return Payload.of(7); |
||||
if (month.startsWith("au")) return Payload.of(8); |
||||
if (month.startsWith("s")) return Payload.of(9); |
||||
if (month.startsWith("o")) return Payload.of(10); |
||||
if (month.startsWith("n")) return Payload.of(11); |
||||
if (month.startsWith("d")) return Payload.of(12); |
||||
return Error.format("Failed to recognize \"%s\" as a month!", month); |
||||
} |
||||
|
||||
|
||||
protected <T> Result<T> transform(Result<?> result) { |
||||
if (result instanceof Error<?> err) return err.transform(); |
||||
return invalidParameter(result); |
||||
} |
||||
|
||||
protected Result<URL> url(Result<String> urlResult) { |
||||
switch (urlResult) { |
||||
case Payload<String> payload: |
||||
var url = payload.get(); |
||||
try { |
||||
return Payload.of(new URI(url).toURL()); |
||||
} catch (MalformedURLException | URISyntaxException e) { |
||||
return de.srsoftware.tools.Error.of("Failed to create URL of %s".formatted(url), e); |
||||
} |
||||
case de.srsoftware.tools.Error<String> err: |
||||
return err.transform(); |
||||
default: |
||||
return invalidParameter(urlResult); |
||||
} |
||||
} |
||||
} |
@ -1,274 +0,0 @@
@@ -1,274 +0,0 @@
|
||||
/* © SRSoftware 2024 */ |
||||
package de.srsoftware.cal.importer; |
||||
|
||||
import static de.srsoftware.tools.Optionals.nullable; |
||||
import static de.srsoftware.tools.TagFilter.*; |
||||
import static java.util.Optional.empty; |
||||
|
||||
import de.srsoftware.cal.api.*; |
||||
import de.srsoftware.tools.*; |
||||
import de.srsoftware.tools.Error; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.net.MalformedURLException; |
||||
import java.net.URI; |
||||
import java.net.URISyntaxException; |
||||
import java.net.URL; |
||||
import java.time.LocalDateTime; |
||||
import java.util.ArrayList; |
||||
import java.util.List; |
||||
import java.util.Objects; |
||||
import java.util.Optional; |
||||
import java.util.regex.Pattern; |
||||
import java.util.stream.Stream; |
||||
|
||||
/** |
||||
* Importer für Events vom Rosenkeller Jena |
||||
*/ |
||||
public class JenaRosenkeller implements Importer { |
||||
private static final String BASE_URL = "https://rosenkeller.org"; |
||||
private static final String APPOINTMENT_TAG_ID = "tribe-events-content"; |
||||
private static final Coords DEFAULT_COORDS = new Coords(50.9294, 11.585); |
||||
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena"; |
||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?"); |
||||
|
||||
@Override |
||||
public String description() { |
||||
return "Events von der Seite rosenkeller.org importieren"; |
||||
} |
||||
|
||||
|
||||
private static List<Attachment> extractAttachments(Tag appointmentTag) { |
||||
return appointmentTag //
|
||||
.find(ofType("img")) |
||||
.stream() |
||||
.map(tag -> tag.get("src")) |
||||
.filter(Objects::nonNull) |
||||
.map(Payload::of) |
||||
.map(JenaRosenkeller::url) |
||||
.map(JenaRosenkeller::toAttachment) |
||||
.map(Result::optional) |
||||
.flatMap(Optional::stream) |
||||
.toList(); |
||||
} |
||||
|
||||
private static Optional<String> extractDescription(Tag appointmentTag) { |
||||
return appointmentTag.find(attributeHas("class", "tribe-events-single-event-description")).stream().flatMap(tag -> tag.inner(2).stream()).findAny(); |
||||
} |
||||
|
||||
private static List<Link> extractLinks(Tag appointmentTag) { |
||||
var links = new ArrayList<Link>(); |
||||
appointmentTag //
|
||||
.find(attributeStartsWith("id", "post-")) |
||||
.stream() |
||||
.flatMap(tag -> tag.find(ofType("a")).stream()) |
||||
.forEach(anchor -> { |
||||
var href = anchor.get("href"); |
||||
if (href == null) return; |
||||
if (!href.contains("://")) href = BASE_URL + "href"; |
||||
var text = anchor.inner(0).orElse(href); |
||||
Payload.of(href).map(JenaRosenkeller::url).optional().map(url -> new Link(url, text)).ifPresent(links::add); |
||||
}); |
||||
return links; |
||||
} |
||||
|
||||
private static Optional<LocalDateTime> extractStart(Tag appointmentTag) { |
||||
return appointmentTag.find(attributeEquals("class", "tribe-event-date-start")).stream().flatMap(tag -> tag.inner(0).stream()).flatMap(txt -> toDateTime(txt).stream()).findAny(); |
||||
} |
||||
private static Optional<String> extractTitle(Tag appointmentTag) { |
||||
return appointmentTag |
||||
.find(attributeEndsWith("class", "single-event-title")) //
|
||||
.stream() |
||||
.flatMap(tag -> tag.inner(2).stream()) |
||||
.findAny(); |
||||
} |
||||
|
||||
@Override |
||||
public Stream<Appointment> fetch() { |
||||
var url = Payload.of(BASE_URL + "/de/programm"); |
||||
Stream<Result<String>> stream = url(url) |
||||
.map(JenaRosenkeller::open) //
|
||||
.map(JenaRosenkeller::preload) |
||||
.map(JenaRosenkeller::parse) |
||||
.map(JenaRosenkeller::findEventUrls) |
||||
.stream(); |
||||
return stream //
|
||||
.map(JenaRosenkeller::url) |
||||
.map(JenaRosenkeller::loadEvent) |
||||
.flatMap(result -> result.optional().stream()); |
||||
} |
||||
|
||||
private static Result<List<String>> findEventUrls(Result<Tag> tagResult) { |
||||
return switch (tagResult) { |
||||
case Payload<Tag> payload -> { |
||||
List<String> urls = payload // find tag with event-id
|
||||
.get() |
||||
.find(attributeStartsWith("id","event-")) |
||||
.stream() |
||||
.map(t -> t.find(attributeEquals("class", "ect-event-url"))) |
||||
.flatMap(List::stream) |
||||
.map(t -> t.get("href")) |
||||
.toList(); |
||||
yield Payload.of(urls); |
||||
} |
||||
case Error<Tag> error -> error.transform(); |
||||
default -> Error.format("Invalid parameter: %s", tagResult.getClass().getSimpleName()); |
||||
}; |
||||
} |
||||
|
||||
|
||||
private static Result<Tag> getEventDiv(Result<Tag> pageResult) { |
||||
switch (pageResult) { |
||||
case Payload<Tag> payload: |
||||
List<Tag> list = payload.get().find(attributeEquals("id", APPOINTMENT_TAG_ID)); |
||||
if (list.size() == 1) return Payload.of(list.getFirst()); |
||||
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID); |
||||
case Error<Tag> err: |
||||
return err.transform(); |
||||
default: |
||||
return Error.format("Invalid parameter: %s", pageResult.getClass().getSimpleName()); |
||||
} |
||||
} |
||||
|
||||
|
||||
private static Result<Appointment> loadEvent(Result<URL> urlResult) { |
||||
var link = urlResult.optional().map(url -> new Link(url, "Event-Seite")).orElse(null); |
||||
return urlResult //
|
||||
.map(JenaRosenkeller::open) |
||||
.map(JenaRosenkeller::preload) |
||||
.map(JenaRosenkeller::parse) |
||||
.map(JenaRosenkeller::getEventDiv) |
||||
.map(tagResult -> parseEvent(tagResult, link)); |
||||
} |
||||
|
||||
private static Result<InputStream> open(Result<URL> url) { |
||||
switch (url) { |
||||
case Payload<URL> payload: |
||||
try { |
||||
return Payload.of(payload.get().openConnection().getInputStream()); |
||||
} catch (IOException e) { |
||||
return Error.of("Failed to open %s".formatted(payload), e); |
||||
} |
||||
case Error<URL> error: |
||||
return error.transform(); |
||||
default: |
||||
return Error.format("Invalid parameter: %s", url.getClass().getSimpleName()); |
||||
} |
||||
} |
||||
|
||||
|
||||
private static Result<Tag> parse(Result<InputStream> inputStream) { |
||||
return switch (inputStream) { |
||||
case Payload<InputStream> payload -> XMLParser.parse(payload.get()); |
||||
case Error<InputStream> error -> error.transform(); |
||||
default -> Error.of("Invalid parameter: %s".formatted(inputStream.getClass().getSimpleName())); |
||||
}; |
||||
} |
||||
|
||||
private static Result<Appointment> parseEvent(Result<Tag> domResult, Link eventPage) { |
||||
switch (domResult) { |
||||
case Payload<Tag> payload: |
||||
var appointmentTag = payload.get(); |
||||
var title = extractTitle(appointmentTag); |
||||
if (title.isEmpty()) return Error.format("No title found at %s", eventPage.url()); |
||||
var description = extractDescription(appointmentTag); |
||||
if (description.isEmpty()) return Error.format("No description found at %s", eventPage.url()); |
||||
var start = extractStart(appointmentTag); |
||||
if (start.isEmpty()) return Error.format("No start date/time found at %s", eventPage.url()); |
||||
var links = extractLinks(appointmentTag); |
||||
var attachments = extractAttachments(appointmentTag); |
||||
var appointment = new BaseAppointment(0, title.get(), description.get(), start.get(), null, DEFAULT_LOCATION).addLinks(links).add(attachments); |
||||
return Payload.of(appointment); |
||||
case Error<Tag> err: |
||||
return err.transform(); |
||||
default: |
||||
return Error.format("Invalid parameter: %s", domResult.getClass().getSimpleName()); |
||||
} |
||||
} |
||||
|
||||
|
||||
private static Result<InputStream> preload(Result<InputStream> inputStream) { |
||||
switch (inputStream) { |
||||
case Payload<InputStream> payload: |
||||
try { |
||||
return Payload.of(XMLParser.preload(payload.get())); |
||||
} catch (IOException e) { |
||||
return Error.of("Failed to buffer data from %s".formatted(payload), e); |
||||
} |
||||
case Error<InputStream> error: |
||||
return error.transform(); |
||||
default: |
||||
return Error.format("Invalid parameter: %s", inputStream.getClass().getSimpleName()); |
||||
} |
||||
} |
||||
|
||||
|
||||
private static Result<Attachment> toAttachment(Result<URL> urlResult) { |
||||
switch (urlResult) { |
||||
case Payload<URL> payload: |
||||
try { |
||||
var mime = payload.get().openConnection().getContentType(); |
||||
return Payload.of(new Attachment(payload.get(), mime)); |
||||
} catch (Exception e) { |
||||
return Error.format("Failed to read mime type of %s", payload); |
||||
} |
||||
case Error<URL> err: |
||||
return err.transform(); |
||||
default: |
||||
return Error.format("Invalid parameter: %s", urlResult.getClass().getSimpleName()); |
||||
} |
||||
} |
||||
|
||||
|
||||
private static Optional<LocalDateTime> toDateTime(String text) { |
||||
var match = DATE_PATTERN.matcher(text); |
||||
if (match.find()) { |
||||
var dayOfMonth = Integer.parseInt(match.group(1)); |
||||
var month = toNumericMonth(match.group(2)); |
||||
if (month.isEmpty()) return empty(); |
||||
|
||||
|
||||
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0")); |
||||
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0")); |
||||
var now = LocalDateTime.now(); |
||||
var date = LocalDateTime.of(now.getYear(), month.get(), dayOfMonth, hour, minute); |
||||
if (date.isBefore(now)) date = date.plusYears(1); |
||||
return Optional.of(date); |
||||
} |
||||
return empty(); |
||||
} |
||||
|
||||
private static Optional<Integer> toNumericMonth(String month) { |
||||
month = month.toLowerCase(); |
||||
if (month.startsWith("ja")) return Optional.of(1); |
||||
if (month.startsWith("f")) return Optional.of(2); |
||||
if ("may".equals(month) || "mai".equals(month)) return Optional.of(5); |
||||
if (month.startsWith("m")) return Optional.of(3); |
||||
if (month.startsWith("ap")) return Optional.of(4); |
||||
if (month.startsWith("jun")) return Optional.of(6); |
||||
if (month.startsWith("jul")) return Optional.of(7); |
||||
if (month.startsWith("au")) return Optional.of(8); |
||||
if (month.startsWith("s")) return Optional.of(9); |
||||
if (month.startsWith("o")) return Optional.of(10); |
||||
if (month.startsWith("n")) return Optional.of(11); |
||||
if (month.startsWith("d")) return Optional.of(12); |
||||
return empty(); |
||||
} |
||||
|
||||
|
||||
private static Result<URL> url(Result<String> urls) { |
||||
switch (urls) { |
||||
case Payload<String> payload: |
||||
var url = payload.get(); |
||||
try { |
||||
return Payload.of(new URI(url).toURL()); |
||||
} catch (MalformedURLException | URISyntaxException e) { |
||||
return Error.of("Failed to create URL of %s".formatted(url), e); |
||||
} |
||||
case Error<String> err: |
||||
return err.transform(); |
||||
default: |
||||
return Error.format("Invalid parameter: %s", urls.getClass().getSimpleName()); |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,127 @@
@@ -0,0 +1,127 @@
|
||||
/* © SRSoftware 2024 */ |
||||
package de.srsoftware.cal.importer.jena; |
||||
|
||||
import static de.srsoftware.tools.Optionals.nullable; |
||||
import static de.srsoftware.tools.TagFilter.*; |
||||
|
||||
import de.srsoftware.cal.importer.BaseImporter; |
||||
import de.srsoftware.tools.Error; |
||||
import de.srsoftware.tools.Payload; |
||||
import de.srsoftware.tools.Result; |
||||
import de.srsoftware.tools.Tag; |
||||
import java.time.LocalDateTime; |
||||
import java.util.List; |
||||
import java.util.regex.Pattern; |
||||
|
||||
public class Rosenkeller extends BaseImporter { |
||||
private static final String APPOINTMENT_TAG_ID = "tribe-events-content"; |
||||
private static final String BASE_URL = "https://rosenkeller.org"; |
||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?"); |
||||
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena"; |
||||
|
||||
@Override |
||||
protected String baseUrl() { |
||||
return BASE_URL; |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractAttachmentsTag(Tag eventTag) { |
||||
return Payload.of(eventTag); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractDescriptionTag(Tag eventTag) { |
||||
var opt = eventTag //
|
||||
.find(attributeHas("class", "tribe-events-single-event-description")) |
||||
.stream() |
||||
.findAny(); |
||||
if (opt.isPresent()) return Payload.of(opt.get()); |
||||
return Error.of("Failed to find description tag"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractEndTag(Tag eventTag) { |
||||
return Error.of("extractEndTag(…) not supported"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractEventTag(Result<Tag> pageResult) { |
||||
if (pageResult.optional().isEmpty()) return transform(pageResult); |
||||
var list = pageResult.optional().get().find(attributeEquals("id", APPOINTMENT_TAG_ID)); |
||||
if (list.size() == 1) return Payload.of(list.getFirst()); |
||||
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) { |
||||
if (programPage.optional().isEmpty()) return transform(programPage); |
||||
List<String> list = programPage.optional() |
||||
.get() //
|
||||
.find(attributeStartsWith("id", "event-")) |
||||
.stream() |
||||
.map(t -> t.find(attributeEquals("class", "ect-event-url"))) |
||||
.flatMap(List::stream) |
||||
.map(t -> t.get("href")) |
||||
.toList(); |
||||
return Payload.of(list); |
||||
} |
||||
|
||||
@Override |
||||
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) { |
||||
if (tagResult.optional().isEmpty()) return transform(tagResult); |
||||
List<Tag> list = tagResult.optional().get().find(attributeStartsWith("id", "post-")).stream().flatMap(tag -> tag.find(ofType("a")).stream()).toList(); |
||||
return Payload.of(list); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractLocationTag(Tag eventTag) { |
||||
return Payload.of(new Tag("span").content(DEFAULT_LOCATION)); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractStartTag(Tag eventTag) { |
||||
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start")); |
||||
if (list.size() == 1) return Payload.of(list.getFirst()); |
||||
return Error.of("Failed to locate start tag"); |
||||
} |
||||
|
||||
@Override |
||||
protected List<String> extractTags(Tag eventTag) { |
||||
return List.of("Rosenkeller", "Jena"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractTitleTag(Tag eventTag) { |
||||
var list = eventTag.find(attributeEndsWith("class", "single-event-title")); |
||||
if (list.size() == 1) return Payload.of(list.getFirst()); |
||||
return Error.of("Failed to find title tag"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDateTime> parseEndDate(String text) { |
||||
return Error.of("parseEndDate(…) not supported"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDateTime> parseStartDate(String text) { |
||||
var match = DATE_PATTERN.matcher(text); |
||||
if (match.find()) { |
||||
var dayOfMonth = Integer.parseInt(match.group(1)); |
||||
var month = toNumericMonth(match.group(2)); |
||||
if (month.optional().isEmpty()) return transform(month); |
||||
|
||||
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0")); |
||||
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0")); |
||||
var now = LocalDateTime.now(); |
||||
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute); |
||||
if (date.isBefore(now)) date = date.plusYears(1); |
||||
return Payload.of(date); |
||||
} |
||||
return Error.format("Failed to recognize a date in \"%s\"", text); |
||||
} |
||||
|
||||
@Override |
||||
protected String programURL() { |
||||
return baseUrl() + "/de/programm"; |
||||
} |
||||
} |
Loading…
Reference in new issue