overhauled code for Rosenkeller
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.app;
|
||||
|
||||
import de.srsoftware.cal.importer.JenaRosenkeller;
|
||||
import de.srsoftware.cal.importer.jena.Rosenkeller;
|
||||
|
||||
/**
|
||||
* Test application
|
||||
@@ -15,7 +15,7 @@ public class Application {
|
||||
* @param args default
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
var rosenkeller = new JenaRosenkeller();
|
||||
var rosenkeller = new Rosenkeller();
|
||||
var appointments = rosenkeller.fetch();
|
||||
appointments.forEach(System.err::println);
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ public class BaseAppointment implements Appointment {
|
||||
* @param newTags the tag to add to the appointment
|
||||
* @return the appointment
|
||||
*/
|
||||
public BaseAppointment tag(String... newTags) {
|
||||
public BaseAppointment tags(String... newTags) {
|
||||
Collections.addAll(tags, newTags);
|
||||
return this;
|
||||
}
|
||||
@@ -96,7 +96,7 @@ public class BaseAppointment implements Appointment {
|
||||
* @param newTags the tag to add to the appointment
|
||||
* @return the appointment
|
||||
*/
|
||||
public BaseAppointment tag(Collection<String> newTags) {
|
||||
public BaseAppointment tags(Collection<String> newTags) {
|
||||
tags.addAll(newTags);
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,305 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer;
|
||||
|
||||
import static de.srsoftware.tools.TagFilter.ofType;
|
||||
|
||||
import de.srsoftware.cal.api.*;
|
||||
import de.srsoftware.tools.*;
|
||||
import de.srsoftware.tools.Error;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public abstract class BaseImporter implements Importer {
|
||||
protected abstract String baseUrl();
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "abstract base class to create other importers on";
|
||||
}
|
||||
|
||||
protected List<Attachment> extractAttachments(Tag eventTag) {
|
||||
return extractAttachmentsTag(eventTag) //
|
||||
.optional()
|
||||
.stream()
|
||||
.flatMap(tag -> tag.find(ofType("img")).stream())
|
||||
.map(tag -> tag.get("src"))
|
||||
.filter(Objects::nonNull)
|
||||
.map(Payload::of)
|
||||
.map(this::url)
|
||||
.map(this::toAttachment)
|
||||
.map(Result::optional)
|
||||
.flatMap(Optional::stream)
|
||||
.toList();
|
||||
}
|
||||
|
||||
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
|
||||
return extractDescriptionTag(eventTag);
|
||||
}
|
||||
|
||||
|
||||
protected Result<String> extractDescription(Tag eventTag){
|
||||
Result<Tag> titleTag = extractDescriptionTag(eventTag);
|
||||
if (titleTag.optional().isEmpty()) return transform(titleTag);
|
||||
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
|
||||
if (inner.isPresent()) return Payload.of(inner.get());
|
||||
return Error.of("No description found");
|
||||
}
|
||||
|
||||
protected abstract Result<Tag> extractDescriptionTag(Tag eventTag);
|
||||
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
return Error.of("not implemented");
|
||||
}
|
||||
|
||||
protected Result<LocalDateTime> extractEnd(Tag eventTag) {
|
||||
Result<Tag> endTag = extractEndTag(eventTag);
|
||||
if (endTag.optional().isEmpty()) return transform(endTag);
|
||||
return parseEndDate(endTag.optional().get().toString(0));
|
||||
}
|
||||
|
||||
protected abstract Result<Tag> extractEndTag(Tag eventTag);
|
||||
|
||||
protected Result<Appointment> extractEvent(Tag eventTag, Link eventPage) {
|
||||
long id = 0;
|
||||
|
||||
var titleResult = extractTitle(eventTag);
|
||||
if (titleResult.optional().isEmpty()) return transform(titleResult);
|
||||
String title = titleResult.optional().get();
|
||||
|
||||
var descriptionResult = extractDescription(eventTag);
|
||||
if (descriptionResult.optional().isEmpty()) return transform(descriptionResult);
|
||||
var description = descriptionResult.optional().get();
|
||||
|
||||
var startResult = extractStart(eventTag);
|
||||
if (startResult.optional().isEmpty()) return transform(startResult);
|
||||
var start = startResult.optional().get();
|
||||
|
||||
var endResult = extractEnd(eventTag);
|
||||
var end = endResult.optional().orElse(null);
|
||||
|
||||
var locationResult = extractLocation(eventTag);
|
||||
if (locationResult.optional().isEmpty()) return transform(locationResult);
|
||||
var location = locationResult.optional().get();
|
||||
|
||||
var event = new BaseAppointment(id, title, description, start, end, location) //
|
||||
.add(extractAttachments(eventTag))
|
||||
.addLinks(extractLinks(eventTag))
|
||||
.tags(extractTags(eventTag));
|
||||
|
||||
extractCoords(eventTag).optional().ifPresent(event::coords);
|
||||
|
||||
return Payload.of(event);
|
||||
}
|
||||
|
||||
|
||||
private Result<Appointment> extractEvent(Result<Tag> domResult, Link eventPage) {
|
||||
return switch (domResult) {
|
||||
case Payload<Tag> payload -> extractEvent(payload.get(), eventPage);
|
||||
case Error<Tag> err -> err.transform();
|
||||
default -> invalidParameter(domResult);
|
||||
};
|
||||
}
|
||||
|
||||
protected abstract Result<Tag> extractEventTag(Result<Tag> pageResult);
|
||||
|
||||
protected abstract Result<List<String>> extractEventUrls(Result<Tag> programPage);
|
||||
|
||||
|
||||
protected List<Link> extractLinks(Tag appointmentTag) {
|
||||
var links = new ArrayList<Link>();
|
||||
|
||||
extractLinksTag(appointmentTag) //
|
||||
.map(this::extractLinkAnchors)
|
||||
.optional()
|
||||
.stream()
|
||||
.flatMap(List::stream).forEach(anchor -> {
|
||||
var href = anchor.get("href");
|
||||
if (href == null) return;
|
||||
if (!href.contains("://")) href = baseUrl() + href;
|
||||
var text = anchor.inner(0).orElse(href);
|
||||
Payload //
|
||||
.of(href)
|
||||
.map(this::url)
|
||||
.optional()
|
||||
.map(url -> new Link(url, text))
|
||||
.ifPresent(links::add);
|
||||
});
|
||||
return links;
|
||||
}
|
||||
|
||||
public abstract Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult);
|
||||
|
||||
private Result<Tag> extractLinksTag(Tag eventTag) {
|
||||
return extractDescriptionTag(eventTag);
|
||||
}
|
||||
|
||||
protected Result<String> extractLocation(Tag eventTag) {
|
||||
Result<Tag> locationTag = extractLocationTag(eventTag);
|
||||
if (locationTag.optional().isEmpty()) return transform(locationTag);
|
||||
return Payload.of(locationTag.optional().get().toString(2));
|
||||
}
|
||||
|
||||
protected abstract Result<Tag> extractLocationTag(Tag eventTag);
|
||||
|
||||
|
||||
protected Result<LocalDateTime> extractStart(Tag eventTag) {
|
||||
Result<Tag> endTag = extractStartTag(eventTag);
|
||||
if (endTag.optional().isEmpty()) return transform(endTag);
|
||||
return parseStartDate(endTag.optional().get().toString(0));
|
||||
}
|
||||
|
||||
protected abstract Result<Tag> extractStartTag(Tag eventTag);
|
||||
|
||||
|
||||
protected abstract List<String> extractTags(Tag eventTag);
|
||||
|
||||
protected Result<String> extractTitle(Tag eventTag) {
|
||||
Result<Tag> locationTag = extractTitleTag(eventTag);
|
||||
if (locationTag.optional().isEmpty()) return transform(locationTag);
|
||||
var inner = locationTag.optional().flatMap(tag -> tag.inner(2));
|
||||
if (inner.isPresent()) return Payload.of(inner.get());
|
||||
return Error.of("No title found");
|
||||
}
|
||||
|
||||
protected abstract Result<Tag> extractTitleTag(Tag eventTag);
|
||||
|
||||
|
||||
@Override
|
||||
public Stream<Appointment> fetch() {
|
||||
var url = Payload.of(programURL());
|
||||
Stream<Result<String>> stream = url(url)
|
||||
.map(this::open) //
|
||||
.map(this::preload)
|
||||
.map(this::parseXML)
|
||||
.map(this::extractEventUrls)
|
||||
.stream();
|
||||
return stream //
|
||||
.map(this::url)
|
||||
.map(this::loadEvent)
|
||||
.flatMap(result -> result.optional().stream());
|
||||
}
|
||||
|
||||
protected static <T> Result<T> invalidParameter(Result<?> result) {
|
||||
return Error.format("Invalid parameter: %s", result.getClass().getSimpleName());
|
||||
}
|
||||
|
||||
protected Result<Appointment> loadEvent(Result<URL> urlResult) {
|
||||
var link = urlResult //
|
||||
.optional().map(url -> new Link(url, "Event-Seite")).orElse(null);
|
||||
return urlResult //
|
||||
.map(this::open)
|
||||
.map(this::preload)
|
||||
.map(this::parseXML)
|
||||
.map(this::extractEventTag)
|
||||
.map(tagResult -> extractEvent(tagResult, link));
|
||||
}
|
||||
|
||||
protected Result<InputStream> open(Result<URL> url) {
|
||||
switch (url) {
|
||||
case Payload<URL> payload:
|
||||
try {
|
||||
return Payload.of(payload.get().openConnection().getInputStream());
|
||||
} catch (IOException e) {
|
||||
return Error.of("Failed to open %s".formatted(payload), e);
|
||||
}
|
||||
case Error<URL> error:
|
||||
return error.transform();
|
||||
default:
|
||||
return invalidParameter(url);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract Result<LocalDateTime> parseEndDate(String string);
|
||||
|
||||
protected abstract Result<LocalDateTime> parseStartDate(String string);
|
||||
|
||||
protected Result<Tag> parseXML(Result<InputStream> inputStream) {
|
||||
return switch (inputStream) {
|
||||
case Payload<InputStream> payload -> XMLParser.parse(payload.get());
|
||||
case Error<InputStream> error -> error.transform();
|
||||
default -> invalidParameter(inputStream);
|
||||
};
|
||||
}
|
||||
|
||||
protected Result<InputStream> preload(Result<InputStream> inputStream) {
|
||||
switch (inputStream) {
|
||||
case Payload<InputStream> payload:
|
||||
try {
|
||||
return Payload.of(XMLParser.preload(payload.get()));
|
||||
} catch (IOException e) {
|
||||
return Error.of("Failed to buffer data from %s".formatted(payload), e);
|
||||
}
|
||||
case Error<InputStream> error:
|
||||
return error.transform();
|
||||
default:
|
||||
return invalidParameter(inputStream);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract String programURL();
|
||||
|
||||
protected Result<Attachment> toAttachment(Result<URL> urlResult) {
|
||||
switch (urlResult) {
|
||||
case Payload<URL> payload:
|
||||
try {
|
||||
var mime = payload.get().openConnection().getContentType();
|
||||
return Payload.of(new Attachment(payload.get(), mime));
|
||||
} catch (Exception e) {
|
||||
return Error.format("Failed to read mime type of %s", payload);
|
||||
}
|
||||
case Error<URL> err:
|
||||
return err.transform();
|
||||
default:
|
||||
return invalidParameter(urlResult);
|
||||
}
|
||||
}
|
||||
|
||||
protected static Result<Integer> toNumericMonth(String month) {
|
||||
month = month.toLowerCase();
|
||||
if (month.startsWith("ja")) return Payload.of(1);
|
||||
if (month.startsWith("f")) return Payload.of(2);
|
||||
if ("may".equals(month) || "mai".equals(month)) return Payload.of(5);
|
||||
if (month.startsWith("m")) return Payload.of(3);
|
||||
if (month.startsWith("ap")) return Payload.of(4);
|
||||
if (month.startsWith("jun")) return Payload.of(6);
|
||||
if (month.startsWith("jul")) return Payload.of(7);
|
||||
if (month.startsWith("au")) return Payload.of(8);
|
||||
if (month.startsWith("s")) return Payload.of(9);
|
||||
if (month.startsWith("o")) return Payload.of(10);
|
||||
if (month.startsWith("n")) return Payload.of(11);
|
||||
if (month.startsWith("d")) return Payload.of(12);
|
||||
return Error.format("Failed to recognize \"%s\" as a month!", month);
|
||||
}
|
||||
|
||||
|
||||
protected <T> Result<T> transform(Result<?> result) {
|
||||
if (result instanceof Error<?> err) return err.transform();
|
||||
return invalidParameter(result);
|
||||
}
|
||||
|
||||
protected Result<URL> url(Result<String> urlResult) {
|
||||
switch (urlResult) {
|
||||
case Payload<String> payload:
|
||||
var url = payload.get();
|
||||
try {
|
||||
return Payload.of(new URI(url).toURL());
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
return de.srsoftware.tools.Error.of("Failed to create URL of %s".formatted(url), e);
|
||||
}
|
||||
case de.srsoftware.tools.Error<String> err:
|
||||
return err.transform();
|
||||
default:
|
||||
return invalidParameter(urlResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,274 +0,0 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer;
|
||||
|
||||
import static de.srsoftware.tools.Optionals.nullable;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
import static java.util.Optional.empty;
|
||||
|
||||
import de.srsoftware.cal.api.*;
|
||||
import de.srsoftware.tools.*;
|
||||
import de.srsoftware.tools.Error;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Importer für Events vom Rosenkeller Jena
|
||||
*/
|
||||
public class JenaRosenkeller implements Importer {
|
||||
private static final String BASE_URL = "https://rosenkeller.org";
|
||||
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
|
||||
private static final Coords DEFAULT_COORDS = new Coords(50.9294, 11.585);
|
||||
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Events von der Seite rosenkeller.org importieren";
|
||||
}
|
||||
|
||||
|
||||
private static List<Attachment> extractAttachments(Tag appointmentTag) {
|
||||
return appointmentTag //
|
||||
.find(ofType("img"))
|
||||
.stream()
|
||||
.map(tag -> tag.get("src"))
|
||||
.filter(Objects::nonNull)
|
||||
.map(Payload::of)
|
||||
.map(JenaRosenkeller::url)
|
||||
.map(JenaRosenkeller::toAttachment)
|
||||
.map(Result::optional)
|
||||
.flatMap(Optional::stream)
|
||||
.toList();
|
||||
}
|
||||
|
||||
private static Optional<String> extractDescription(Tag appointmentTag) {
|
||||
return appointmentTag.find(attributeHas("class", "tribe-events-single-event-description")).stream().flatMap(tag -> tag.inner(2).stream()).findAny();
|
||||
}
|
||||
|
||||
private static List<Link> extractLinks(Tag appointmentTag) {
|
||||
var links = new ArrayList<Link>();
|
||||
appointmentTag //
|
||||
.find(attributeStartsWith("id", "post-"))
|
||||
.stream()
|
||||
.flatMap(tag -> tag.find(ofType("a")).stream())
|
||||
.forEach(anchor -> {
|
||||
var href = anchor.get("href");
|
||||
if (href == null) return;
|
||||
if (!href.contains("://")) href = BASE_URL + "href";
|
||||
var text = anchor.inner(0).orElse(href);
|
||||
Payload.of(href).map(JenaRosenkeller::url).optional().map(url -> new Link(url, text)).ifPresent(links::add);
|
||||
});
|
||||
return links;
|
||||
}
|
||||
|
||||
private static Optional<LocalDateTime> extractStart(Tag appointmentTag) {
|
||||
return appointmentTag.find(attributeEquals("class", "tribe-event-date-start")).stream().flatMap(tag -> tag.inner(0).stream()).flatMap(txt -> toDateTime(txt).stream()).findAny();
|
||||
}
|
||||
private static Optional<String> extractTitle(Tag appointmentTag) {
|
||||
return appointmentTag
|
||||
.find(attributeEndsWith("class", "single-event-title")) //
|
||||
.stream()
|
||||
.flatMap(tag -> tag.inner(2).stream())
|
||||
.findAny();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Appointment> fetch() {
|
||||
var url = Payload.of(BASE_URL + "/de/programm");
|
||||
Stream<Result<String>> stream = url(url)
|
||||
.map(JenaRosenkeller::open) //
|
||||
.map(JenaRosenkeller::preload)
|
||||
.map(JenaRosenkeller::parse)
|
||||
.map(JenaRosenkeller::findEventUrls)
|
||||
.stream();
|
||||
return stream //
|
||||
.map(JenaRosenkeller::url)
|
||||
.map(JenaRosenkeller::loadEvent)
|
||||
.flatMap(result -> result.optional().stream());
|
||||
}
|
||||
|
||||
private static Result<List<String>> findEventUrls(Result<Tag> tagResult) {
|
||||
return switch (tagResult) {
|
||||
case Payload<Tag> payload -> {
|
||||
List<String> urls = payload // find tag with event-id
|
||||
.get()
|
||||
.find(attributeStartsWith("id","event-"))
|
||||
.stream()
|
||||
.map(t -> t.find(attributeEquals("class", "ect-event-url")))
|
||||
.flatMap(List::stream)
|
||||
.map(t -> t.get("href"))
|
||||
.toList();
|
||||
yield Payload.of(urls);
|
||||
}
|
||||
case Error<Tag> error -> error.transform();
|
||||
default -> Error.format("Invalid parameter: %s", tagResult.getClass().getSimpleName());
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
private static Result<Tag> getEventDiv(Result<Tag> pageResult) {
|
||||
switch (pageResult) {
|
||||
case Payload<Tag> payload:
|
||||
List<Tag> list = payload.get().find(attributeEquals("id", APPOINTMENT_TAG_ID));
|
||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
|
||||
case Error<Tag> err:
|
||||
return err.transform();
|
||||
default:
|
||||
return Error.format("Invalid parameter: %s", pageResult.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static Result<Appointment> loadEvent(Result<URL> urlResult) {
|
||||
var link = urlResult.optional().map(url -> new Link(url, "Event-Seite")).orElse(null);
|
||||
return urlResult //
|
||||
.map(JenaRosenkeller::open)
|
||||
.map(JenaRosenkeller::preload)
|
||||
.map(JenaRosenkeller::parse)
|
||||
.map(JenaRosenkeller::getEventDiv)
|
||||
.map(tagResult -> parseEvent(tagResult, link));
|
||||
}
|
||||
|
||||
private static Result<InputStream> open(Result<URL> url) {
|
||||
switch (url) {
|
||||
case Payload<URL> payload:
|
||||
try {
|
||||
return Payload.of(payload.get().openConnection().getInputStream());
|
||||
} catch (IOException e) {
|
||||
return Error.of("Failed to open %s".formatted(payload), e);
|
||||
}
|
||||
case Error<URL> error:
|
||||
return error.transform();
|
||||
default:
|
||||
return Error.format("Invalid parameter: %s", url.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static Result<Tag> parse(Result<InputStream> inputStream) {
|
||||
return switch (inputStream) {
|
||||
case Payload<InputStream> payload -> XMLParser.parse(payload.get());
|
||||
case Error<InputStream> error -> error.transform();
|
||||
default -> Error.of("Invalid parameter: %s".formatted(inputStream.getClass().getSimpleName()));
|
||||
};
|
||||
}
|
||||
|
||||
private static Result<Appointment> parseEvent(Result<Tag> domResult, Link eventPage) {
|
||||
switch (domResult) {
|
||||
case Payload<Tag> payload:
|
||||
var appointmentTag = payload.get();
|
||||
var title = extractTitle(appointmentTag);
|
||||
if (title.isEmpty()) return Error.format("No title found at %s", eventPage.url());
|
||||
var description = extractDescription(appointmentTag);
|
||||
if (description.isEmpty()) return Error.format("No description found at %s", eventPage.url());
|
||||
var start = extractStart(appointmentTag);
|
||||
if (start.isEmpty()) return Error.format("No start date/time found at %s", eventPage.url());
|
||||
var links = extractLinks(appointmentTag);
|
||||
var attachments = extractAttachments(appointmentTag);
|
||||
var appointment = new BaseAppointment(0, title.get(), description.get(), start.get(), null, DEFAULT_LOCATION).addLinks(links).add(attachments);
|
||||
return Payload.of(appointment);
|
||||
case Error<Tag> err:
|
||||
return err.transform();
|
||||
default:
|
||||
return Error.format("Invalid parameter: %s", domResult.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static Result<InputStream> preload(Result<InputStream> inputStream) {
|
||||
switch (inputStream) {
|
||||
case Payload<InputStream> payload:
|
||||
try {
|
||||
return Payload.of(XMLParser.preload(payload.get()));
|
||||
} catch (IOException e) {
|
||||
return Error.of("Failed to buffer data from %s".formatted(payload), e);
|
||||
}
|
||||
case Error<InputStream> error:
|
||||
return error.transform();
|
||||
default:
|
||||
return Error.format("Invalid parameter: %s", inputStream.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static Result<Attachment> toAttachment(Result<URL> urlResult) {
|
||||
switch (urlResult) {
|
||||
case Payload<URL> payload:
|
||||
try {
|
||||
var mime = payload.get().openConnection().getContentType();
|
||||
return Payload.of(new Attachment(payload.get(), mime));
|
||||
} catch (Exception e) {
|
||||
return Error.format("Failed to read mime type of %s", payload);
|
||||
}
|
||||
case Error<URL> err:
|
||||
return err.transform();
|
||||
default:
|
||||
return Error.format("Invalid parameter: %s", urlResult.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static Optional<LocalDateTime> toDateTime(String text) {
|
||||
var match = DATE_PATTERN.matcher(text);
|
||||
if (match.find()) {
|
||||
var dayOfMonth = Integer.parseInt(match.group(1));
|
||||
var month = toNumericMonth(match.group(2));
|
||||
if (month.isEmpty()) return empty();
|
||||
|
||||
|
||||
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0"));
|
||||
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0"));
|
||||
var now = LocalDateTime.now();
|
||||
var date = LocalDateTime.of(now.getYear(), month.get(), dayOfMonth, hour, minute);
|
||||
if (date.isBefore(now)) date = date.plusYears(1);
|
||||
return Optional.of(date);
|
||||
}
|
||||
return empty();
|
||||
}
|
||||
|
||||
private static Optional<Integer> toNumericMonth(String month) {
|
||||
month = month.toLowerCase();
|
||||
if (month.startsWith("ja")) return Optional.of(1);
|
||||
if (month.startsWith("f")) return Optional.of(2);
|
||||
if ("may".equals(month) || "mai".equals(month)) return Optional.of(5);
|
||||
if (month.startsWith("m")) return Optional.of(3);
|
||||
if (month.startsWith("ap")) return Optional.of(4);
|
||||
if (month.startsWith("jun")) return Optional.of(6);
|
||||
if (month.startsWith("jul")) return Optional.of(7);
|
||||
if (month.startsWith("au")) return Optional.of(8);
|
||||
if (month.startsWith("s")) return Optional.of(9);
|
||||
if (month.startsWith("o")) return Optional.of(10);
|
||||
if (month.startsWith("n")) return Optional.of(11);
|
||||
if (month.startsWith("d")) return Optional.of(12);
|
||||
return empty();
|
||||
}
|
||||
|
||||
|
||||
private static Result<URL> url(Result<String> urls) {
|
||||
switch (urls) {
|
||||
case Payload<String> payload:
|
||||
var url = payload.get();
|
||||
try {
|
||||
return Payload.of(new URI(url).toURL());
|
||||
} catch (MalformedURLException | URISyntaxException e) {
|
||||
return Error.of("Failed to create URL of %s".formatted(url), e);
|
||||
}
|
||||
case Error<String> err:
|
||||
return err.transform();
|
||||
default:
|
||||
return Error.format("Invalid parameter: %s", urls.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer.jena;
|
||||
|
||||
import static de.srsoftware.tools.Optionals.nullable;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
|
||||
import de.srsoftware.cal.importer.BaseImporter;
|
||||
import de.srsoftware.tools.Error;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class Rosenkeller extends BaseImporter {
|
||||
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
|
||||
private static final String BASE_URL = "https://rosenkeller.org";
|
||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
||||
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
||||
|
||||
@Override
|
||||
protected String baseUrl() {
|
||||
return BASE_URL;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractAttachmentsTag(Tag eventTag) {
|
||||
return Payload.of(eventTag);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
|
||||
var opt = eventTag //
|
||||
.find(attributeHas("class", "tribe-events-single-event-description"))
|
||||
.stream()
|
||||
.findAny();
|
||||
if (opt.isPresent()) return Payload.of(opt.get());
|
||||
return Error.of("Failed to find description tag");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractEndTag(Tag eventTag) {
|
||||
return Error.of("extractEndTag(…) not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
|
||||
if (pageResult.optional().isEmpty()) return transform(pageResult);
|
||||
var list = pageResult.optional().get().find(attributeEquals("id", APPOINTMENT_TAG_ID));
|
||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||
if (programPage.optional().isEmpty()) return transform(programPage);
|
||||
List<String> list = programPage.optional()
|
||||
.get() //
|
||||
.find(attributeStartsWith("id", "event-"))
|
||||
.stream()
|
||||
.map(t -> t.find(attributeEquals("class", "ect-event-url")))
|
||||
.flatMap(List::stream)
|
||||
.map(t -> t.get("href"))
|
||||
.toList();
|
||||
return Payload.of(list);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) {
|
||||
if (tagResult.optional().isEmpty()) return transform(tagResult);
|
||||
List<Tag> list = tagResult.optional().get().find(attributeStartsWith("id", "post-")).stream().flatMap(tag -> tag.find(ofType("a")).stream()).toList();
|
||||
return Payload.of(list);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
||||
return Payload.of(new Tag("span").content(DEFAULT_LOCATION));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractStartTag(Tag eventTag) {
|
||||
List<Tag> list = eventTag.find(attributeEquals("class", "tribe-event-date-start"));
|
||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||
return Error.of("Failed to locate start tag");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> extractTags(Tag eventTag) {
|
||||
return List.of("Rosenkeller", "Jena");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractTitleTag(Tag eventTag) {
|
||||
var list = eventTag.find(attributeEndsWith("class", "single-event-title"));
|
||||
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||
return Error.of("Failed to find title tag");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDateTime> parseEndDate(String text) {
|
||||
return Error.of("parseEndDate(…) not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDateTime> parseStartDate(String text) {
|
||||
var match = DATE_PATTERN.matcher(text);
|
||||
if (match.find()) {
|
||||
var dayOfMonth = Integer.parseInt(match.group(1));
|
||||
var month = toNumericMonth(match.group(2));
|
||||
if (month.optional().isEmpty()) return transform(month);
|
||||
|
||||
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0"));
|
||||
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0"));
|
||||
var now = LocalDateTime.now();
|
||||
var date = LocalDateTime.of(now.getYear(), month.optional().get(), dayOfMonth, hour, minute);
|
||||
if (date.isBefore(now)) date = date.plusYears(1);
|
||||
return Payload.of(date);
|
||||
}
|
||||
return Error.format("Failed to recognize a date in \"%s\"", text);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String programURL() {
|
||||
return baseUrl() + "/de/programm";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user