|
|
@ -16,12 +16,18 @@ import java.net.URI; |
|
|
|
import java.net.URISyntaxException; |
|
|
|
import java.net.URISyntaxException; |
|
|
|
import java.net.URL; |
|
|
|
import java.net.URL; |
|
|
|
import java.time.LocalDateTime; |
|
|
|
import java.time.LocalDateTime; |
|
|
|
|
|
|
|
import java.util.ArrayList; |
|
|
|
import java.util.List; |
|
|
|
import java.util.List; |
|
|
|
|
|
|
|
import java.util.Objects; |
|
|
|
import java.util.Optional; |
|
|
|
import java.util.Optional; |
|
|
|
import java.util.regex.Pattern; |
|
|
|
import java.util.regex.Pattern; |
|
|
|
import java.util.stream.Stream; |
|
|
|
import java.util.stream.Stream; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|
|
|
* Importer für Events vom Rosenkeller Jena |
|
|
|
|
|
|
|
*/ |
|
|
|
public class JenaRosenkeller implements Importer { |
|
|
|
public class JenaRosenkeller implements Importer { |
|
|
|
|
|
|
|
private static final String BASE_URL = "https://rosenkeller.org"; |
|
|
|
private static final String APPOINTMENT_TAG_ID = "tribe-events-content"; |
|
|
|
private static final String APPOINTMENT_TAG_ID = "tribe-events-content"; |
|
|
|
private static final Coords DEFAULT_COORDS = new Coords(50.9294, 11.585); |
|
|
|
private static final Coords DEFAULT_COORDS = new Coords(50.9294, 11.585); |
|
|
|
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena"; |
|
|
|
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena"; |
|
|
@ -33,8 +39,8 @@ public class JenaRosenkeller implements Importer { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
@Override |
|
|
|
public Stream<Result<Appointment>> fetch() throws IOException { |
|
|
|
public Stream<Appointment> fetch() { |
|
|
|
var url = Payload.of("https://rosenkeller.org/de/programm"); |
|
|
|
var url = Payload.of(BASE_URL + "/de/programm"); |
|
|
|
Stream<Result<String>> stream = url(url) |
|
|
|
Stream<Result<String>> stream = url(url) |
|
|
|
.map(JenaRosenkeller::open) //
|
|
|
|
.map(JenaRosenkeller::open) //
|
|
|
|
.map(JenaRosenkeller::preload) |
|
|
|
.map(JenaRosenkeller::preload) |
|
|
@ -43,11 +49,18 @@ public class JenaRosenkeller implements Importer { |
|
|
|
.stream(); |
|
|
|
.stream(); |
|
|
|
return stream //
|
|
|
|
return stream //
|
|
|
|
.map(JenaRosenkeller::url) |
|
|
|
.map(JenaRosenkeller::url) |
|
|
|
|
|
|
|
.map(JenaRosenkeller::loadEvent) |
|
|
|
|
|
|
|
.flatMap(result -> result.optional().stream()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static Result<Appointment> loadEvent(Result<URL> urlResult) { |
|
|
|
|
|
|
|
var link = urlResult.optional().map(url -> new Link(url, "Event-Seite")).orElse(null); |
|
|
|
|
|
|
|
return urlResult //
|
|
|
|
.map(JenaRosenkeller::open) |
|
|
|
.map(JenaRosenkeller::open) |
|
|
|
.map(JenaRosenkeller::preload) |
|
|
|
.map(JenaRosenkeller::preload) |
|
|
|
.map(JenaRosenkeller::parse) |
|
|
|
.map(JenaRosenkeller::parse) |
|
|
|
.map(JenaRosenkeller::getEventDiv) |
|
|
|
.map(JenaRosenkeller::getEventDiv) |
|
|
|
.map(JenaRosenkeller::loadEvent); |
|
|
|
.map(tagResult -> parseEvent(tagResult, link)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private static Result<Tag> getEventDiv(Result<Tag> pageResult) { |
|
|
|
private static Result<Tag> getEventDiv(Result<Tag> pageResult) { |
|
|
@ -119,14 +132,20 @@ public class JenaRosenkeller implements Importer { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private static Result<Appointment> loadEvent(Result<Tag> domResult) { |
|
|
|
private static Result<Appointment> parseEvent(Result<Tag> domResult, Link eventPage) { |
|
|
|
switch (domResult) { |
|
|
|
switch (domResult) { |
|
|
|
case Payload<Tag> payload: |
|
|
|
case Payload<Tag> payload: |
|
|
|
var appointmentTag = payload.get(); |
|
|
|
var appointmentTag = payload.get(); |
|
|
|
var title = extractTitle(appointmentTag); |
|
|
|
var title = extractTitle(appointmentTag); |
|
|
|
|
|
|
|
if (title.isEmpty()) return Error.format("No title found at %s", eventPage.url()); |
|
|
|
var description = extractDescription(appointmentTag); |
|
|
|
var description = extractDescription(appointmentTag); |
|
|
|
|
|
|
|
if (description.isEmpty()) return Error.format("No description found at %s", eventPage.url()); |
|
|
|
var start = extractStart(appointmentTag); |
|
|
|
var start = extractStart(appointmentTag); |
|
|
|
return Error.of("Could not find appointment title"); |
|
|
|
if (start.isEmpty()) return Error.format("No start date/time found at %s", eventPage.url()); |
|
|
|
|
|
|
|
var links = extractLinks(appointmentTag); |
|
|
|
|
|
|
|
var attachments = extractAttachments(appointmentTag); |
|
|
|
|
|
|
|
var appointment = new BaseAppointment(0, title.get(), description.get(), start.get(), null, DEFAULT_LOCATION).addLinks(links).add(attachments); |
|
|
|
|
|
|
|
return Payload.of(appointment); |
|
|
|
case Error<Tag> err: |
|
|
|
case Error<Tag> err: |
|
|
|
return err.transform(); |
|
|
|
return err.transform(); |
|
|
|
default: |
|
|
|
default: |
|
|
@ -134,6 +153,60 @@ public class JenaRosenkeller implements Importer { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static List<Attachment> extractAttachments(Tag appointmentTag) { |
|
|
|
|
|
|
|
return appointmentTag //
|
|
|
|
|
|
|
|
.find(ofType("img")) |
|
|
|
|
|
|
|
.stream() |
|
|
|
|
|
|
|
.map(tag -> tag.get("src")) |
|
|
|
|
|
|
|
.filter(Objects::nonNull) |
|
|
|
|
|
|
|
.map(Payload::of) |
|
|
|
|
|
|
|
.map(JenaRosenkeller::url) |
|
|
|
|
|
|
|
.map(JenaRosenkeller::toAttachment) |
|
|
|
|
|
|
|
.map(Result::optional) |
|
|
|
|
|
|
|
.flatMap(Optional::stream) |
|
|
|
|
|
|
|
.toList(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static Result<Attachment> toAttachment(Result<URL> urlResult) { |
|
|
|
|
|
|
|
switch (urlResult) { |
|
|
|
|
|
|
|
case Payload<URL> payload: |
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
var mime = payload.get().openConnection().getContentType(); |
|
|
|
|
|
|
|
return Payload.of(new Attachment(payload.get(), mime)); |
|
|
|
|
|
|
|
} catch (Exception e) { |
|
|
|
|
|
|
|
return Error.format("Failed to read mime type of %s", payload); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
case Error<URL> err: |
|
|
|
|
|
|
|
return err.transform(); |
|
|
|
|
|
|
|
default: |
|
|
|
|
|
|
|
return Error.format("Invalid parameter: %s", urlResult.getClass().getSimpleName()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static List<Link> extractLinks(Tag appointmentTag) { |
|
|
|
|
|
|
|
var links = new ArrayList<Link>(); |
|
|
|
|
|
|
|
appointmentTag //
|
|
|
|
|
|
|
|
.find(attributeStartsWith("id", "post-")) |
|
|
|
|
|
|
|
.stream() |
|
|
|
|
|
|
|
.flatMap(tag -> tag.find(ofType("a")).stream()) |
|
|
|
|
|
|
|
.forEach(anchor -> { |
|
|
|
|
|
|
|
var href = anchor.get("href"); |
|
|
|
|
|
|
|
if (href == null) return; |
|
|
|
|
|
|
|
if (!href.contains("://")) href = BASE_URL + "href"; |
|
|
|
|
|
|
|
var text = anchor.inner(0).orElse(href); |
|
|
|
|
|
|
|
Payload.of(href).map(JenaRosenkeller::url).optional().map(url -> new Link(url, text)).ifPresent(links::add); |
|
|
|
|
|
|
|
}); |
|
|
|
|
|
|
|
return links; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static Result<Link> toLink(Result<URL> urlResult, Optional<String> description) { |
|
|
|
|
|
|
|
return switch (urlResult) { |
|
|
|
|
|
|
|
case Payload<URL> payload -> Payload.of(new Link(payload.get(),description.orElse(payload.toString()))); |
|
|
|
|
|
|
|
case Error<URL> err -> err.transform(); |
|
|
|
|
|
|
|
default -> Error.format("Invalid parameter: %s", urlResult.getClass().getSimpleName()); |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private static Optional<LocalDateTime> extractStart(Tag appointmentTag) { |
|
|
|
private static Optional<LocalDateTime> extractStart(Tag appointmentTag) { |
|
|
|
return appointmentTag.find(attributeEquals("class", "tribe-event-date-start")).stream().flatMap(tag -> tag.inner(0).stream()).flatMap(txt -> toDateTime(txt).stream()).findAny(); |
|
|
|
return appointmentTag.find(attributeEquals("class", "tribe-event-date-start")).stream().flatMap(tag -> tag.inner(0).stream()).flatMap(txt -> toDateTime(txt).stream()).findAny(); |
|
|
|
} |
|
|
|
} |
|
|
@ -185,29 +258,6 @@ public class JenaRosenkeller implements Importer { |
|
|
|
.findAny(); |
|
|
|
.findAny(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private static Optional<Appointment> nope(URL url) { |
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
var input = url.openConnection().getInputStream(); |
|
|
|
|
|
|
|
input = XMLParser.preload(input); |
|
|
|
|
|
|
|
var result = XMLParser.parse(input); |
|
|
|
|
|
|
|
input.close(); |
|
|
|
|
|
|
|
if (result instanceof Payload<Tag> payload) { |
|
|
|
|
|
|
|
var tag = payload.get(); |
|
|
|
|
|
|
|
tag.find(attributeEndsWith("class", "single-event-title")) //
|
|
|
|
|
|
|
|
.stream() |
|
|
|
|
|
|
|
.map(Tag::children) |
|
|
|
|
|
|
|
.filter(not(List::isEmpty)) |
|
|
|
|
|
|
|
.map(List::getFirst) |
|
|
|
|
|
|
|
.map(Tag::toString) |
|
|
|
|
|
|
|
.forEach(System.out::println); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return empty(); |
|
|
|
|
|
|
|
} catch (IOException e) { |
|
|
|
|
|
|
|
e.printStackTrace(); |
|
|
|
|
|
|
|
return empty(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static Result<URL> url(Result<String> urls) { |
|
|
|
private static Result<URL> url(Result<String> urls) { |
|
|
|
switch (urls) { |
|
|
|
switch (urls) { |
|
|
|
case Payload<String> payload: |
|
|
|
case Payload<String> payload: |
|
|
|