working on event extraction from rosenkeller
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -2,18 +2,9 @@
|
|||||||
package de.srsoftware.cal.api;
|
package de.srsoftware.cal.api;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cartesian coordinates
|
* cartesian coords
|
||||||
|
* @param longitude the longitude
|
||||||
|
* @param latitude the latitude
|
||||||
*/
|
*/
|
||||||
public interface Coords {
|
public record Coords(double longitude, double latitude) {
|
||||||
/**
|
|
||||||
* the longitude
|
|
||||||
* @return the longitude
|
|
||||||
*/
|
|
||||||
double longitude();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* the latitude
|
|
||||||
* @return the latitude
|
|
||||||
*/
|
|
||||||
double latitude();
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,5 +4,5 @@ dependencies {
|
|||||||
implementation(project(":de.srsoftware.cal.api"))
|
implementation(project(":de.srsoftware.cal.api"))
|
||||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||||
implementation("de.srsoftware:tools.util:1.1.1")
|
implementation("de.srsoftware:tools.util:1.1.1")
|
||||||
implementation("de.srsoftware:tools.web:1.2.2")
|
implementation("de.srsoftware:tools.web:1.3.2")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,44 @@
|
|||||||
|
/* © SRSoftware 2024 */
|
||||||
|
package de.srsoftware.cal.importer;
|
||||||
|
|
||||||
|
import static java.util.Optional.empty;
|
||||||
|
|
||||||
|
import de.srsoftware.cal.api.Appointment;
|
||||||
|
import de.srsoftware.cal.api.Attachment;
|
||||||
|
import de.srsoftware.cal.api.Coords;
|
||||||
|
import de.srsoftware.cal.api.Link;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public abstract class BaseAppointment implements Appointment {
|
||||||
|
@Override
|
||||||
|
public Set<Attachment> attachments() {
|
||||||
|
return Set.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Coords> coords() {
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<LocalDateTime> end() {
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<String> tags() {
|
||||||
|
return Set.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "%s (%s)".formatted(title(), BaseAppointment.class.getSimpleName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Link> urls() {
|
||||||
|
return Set.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,15 +1,14 @@
|
|||||||
/* © SRSoftware 2024 */
|
/* © SRSoftware 2024 */
|
||||||
package de.srsoftware.cal.importer;
|
package de.srsoftware.cal.importer;
|
||||||
|
|
||||||
|
import static de.srsoftware.tools.Optionals.nullable;
|
||||||
|
import static de.srsoftware.tools.TagFilter.*;
|
||||||
import static java.util.Optional.empty;
|
import static java.util.Optional.empty;
|
||||||
import static java.util.function.Predicate.not;
|
import static java.util.function.Predicate.not;
|
||||||
|
|
||||||
import de.srsoftware.cal.api.*;
|
import de.srsoftware.cal.api.*;
|
||||||
|
import de.srsoftware.tools.*;
|
||||||
import de.srsoftware.tools.Error;
|
import de.srsoftware.tools.Error;
|
||||||
import de.srsoftware.tools.Payload;
|
|
||||||
import de.srsoftware.tools.Result;
|
|
||||||
import de.srsoftware.tools.Tag;
|
|
||||||
import de.srsoftware.tools.XMLParser;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
@@ -19,66 +18,14 @@ import java.net.URL;
|
|||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public class JenaRosenkeller implements Importer {
|
public class JenaRosenkeller implements Importer {
|
||||||
private static class EmptyAppointment implements Appointment {
|
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
|
||||||
@Override
|
private static final Coords DEFAULT_COORDS = new Coords(50.9294, 11.585);
|
||||||
public Set<Attachment> attachments() {
|
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
||||||
return Set.of();
|
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Optional<Coords> coords() {
|
|
||||||
return empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String description() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Optional<LocalDateTime> end() {
|
|
||||||
return empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long id() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String location() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LocalDateTime start() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Set<String> tags() {
|
|
||||||
return Set.of();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String title() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "%s (%s)".formatted(title(), EmptyAppointment.class.getSimpleName());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Set<Link> urls() {
|
|
||||||
return Set.of();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String description() {
|
public String description() {
|
||||||
@@ -99,17 +46,31 @@ public class JenaRosenkeller implements Importer {
|
|||||||
.map(JenaRosenkeller::open)
|
.map(JenaRosenkeller::open)
|
||||||
.map(JenaRosenkeller::preload)
|
.map(JenaRosenkeller::preload)
|
||||||
.map(JenaRosenkeller::parse)
|
.map(JenaRosenkeller::parse)
|
||||||
|
.map(JenaRosenkeller::getEventDiv)
|
||||||
.map(JenaRosenkeller::loadEvent);
|
.map(JenaRosenkeller::loadEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Result<Tag> getEventDiv(Result<Tag> pageResult) {
|
||||||
|
switch (pageResult) {
|
||||||
|
case Payload<Tag> payload:
|
||||||
|
List<Tag> list = payload.get().find(attributeEquals("id", APPOINTMENT_TAG_ID));
|
||||||
|
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||||
|
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
|
||||||
|
case Error<Tag> err:
|
||||||
|
return err.transform();
|
||||||
|
default:
|
||||||
|
return Error.format("Invalid parameter: %s", pageResult.getClass().getSimpleName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static Result<List<String>> findEventUrls(Result<Tag> tagResult) {
|
private static Result<List<String>> findEventUrls(Result<Tag> tagResult) {
|
||||||
return switch (tagResult) {
|
return switch (tagResult) {
|
||||||
case Payload<Tag> payload -> {
|
case Payload<Tag> payload -> {
|
||||||
List<String> urls = payload // find tag with event-id
|
List<String> urls = payload // find tag with event-id
|
||||||
.get()
|
.get()
|
||||||
.find("id", val -> val.startsWith("event-"))
|
.find(attributeStartsWith("id","event-"))
|
||||||
.stream()
|
.stream()
|
||||||
.map(t -> t.find("class", "ect-event-url"::equals))
|
.map(t -> t.find(attributeEquals("class", "ect-event-url")))
|
||||||
.flatMap(List::stream)
|
.flatMap(List::stream)
|
||||||
.map(t -> t.get("href"))
|
.map(t -> t.get("href"))
|
||||||
.toList();
|
.toList();
|
||||||
@@ -161,23 +122,10 @@ public class JenaRosenkeller implements Importer {
|
|||||||
private static Result<Appointment> loadEvent(Result<Tag> domResult) {
|
private static Result<Appointment> loadEvent(Result<Tag> domResult) {
|
||||||
switch (domResult) {
|
switch (domResult) {
|
||||||
case Payload<Tag> payload:
|
case Payload<Tag> payload:
|
||||||
var tag = payload.get();
|
var appointmentTag = payload.get();
|
||||||
Optional<String> title = tag.find("class", s -> s.endsWith("single-event-title")) //
|
var title = extractTitle(appointmentTag);
|
||||||
.stream()
|
var description = extractDescription(appointmentTag);
|
||||||
.map(Tag::children)
|
var start = extractStart(appointmentTag);
|
||||||
.filter(not(List::isEmpty))
|
|
||||||
.map(List::getFirst)
|
|
||||||
.map(Tag::toString)
|
|
||||||
.findAny();
|
|
||||||
if (title.isPresent()) {
|
|
||||||
var appointment = new EmptyAppointment() {
|
|
||||||
@Override
|
|
||||||
public String title() {
|
|
||||||
return title.get();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
return Payload.of(appointment);
|
|
||||||
}
|
|
||||||
return Error.of("Could not find appointment title");
|
return Error.of("Could not find appointment title");
|
||||||
case Error<Tag> err:
|
case Error<Tag> err:
|
||||||
return err.transform();
|
return err.transform();
|
||||||
@@ -186,6 +134,57 @@ public class JenaRosenkeller implements Importer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Optional<LocalDateTime> extractStart(Tag appointmentTag) {
|
||||||
|
return appointmentTag.find(attributeEquals("class", "tribe-event-date-start")).stream().flatMap(tag -> tag.inner(0).stream()).flatMap(txt -> toDateTime(txt).stream()).findAny();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Optional<LocalDateTime> toDateTime(String text) {
|
||||||
|
var match = DATE_PATTERN.matcher(text);
|
||||||
|
if (match.find()) {
|
||||||
|
var dayOfMonth = Integer.parseInt(match.group(1));
|
||||||
|
var month = toNumericMonth(match.group(2));
|
||||||
|
if (month.isEmpty()) return empty();
|
||||||
|
|
||||||
|
|
||||||
|
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0"));
|
||||||
|
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0"));
|
||||||
|
var now = LocalDateTime.now();
|
||||||
|
var date = LocalDateTime.of(now.getYear(), month.get(), dayOfMonth, hour, minute);
|
||||||
|
if (date.isBefore(now)) date = date.plusYears(1);
|
||||||
|
return Optional.of(date);
|
||||||
|
}
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Optional<Integer> toNumericMonth(String month) {
|
||||||
|
month = month.toLowerCase();
|
||||||
|
if (month.startsWith("ja")) return Optional.of(1);
|
||||||
|
if (month.startsWith("f")) return Optional.of(2);
|
||||||
|
if ("may".equals(month) || "mai".equals(month)) return Optional.of(5);
|
||||||
|
if (month.startsWith("m")) return Optional.of(3);
|
||||||
|
if (month.startsWith("ap")) return Optional.of(4);
|
||||||
|
if (month.startsWith("jun")) return Optional.of(6);
|
||||||
|
if (month.startsWith("jul")) return Optional.of(7);
|
||||||
|
if (month.startsWith("au")) return Optional.of(8);
|
||||||
|
if (month.startsWith("s")) return Optional.of(9);
|
||||||
|
if (month.startsWith("o")) return Optional.of(10);
|
||||||
|
if (month.startsWith("n")) return Optional.of(11);
|
||||||
|
if (month.startsWith("d")) return Optional.of(12);
|
||||||
|
return empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Optional<String> extractDescription(Tag appointmentTag) {
|
||||||
|
return appointmentTag.find(attributeHas("class", "tribe-events-single-event-description")).stream().flatMap(tag -> tag.inner(2).stream()).findAny();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Optional<String> extractTitle(Tag appointmentTag) {
|
||||||
|
return appointmentTag
|
||||||
|
.find(attributeEndsWith("class", "single-event-title")) //
|
||||||
|
.stream()
|
||||||
|
.flatMap(tag -> tag.inner(2).stream())
|
||||||
|
.findAny();
|
||||||
|
}
|
||||||
|
|
||||||
private static Optional<Appointment> nope(URL url) {
|
private static Optional<Appointment> nope(URL url) {
|
||||||
try {
|
try {
|
||||||
var input = url.openConnection().getInputStream();
|
var input = url.openConnection().getInputStream();
|
||||||
@@ -194,7 +193,7 @@ public class JenaRosenkeller implements Importer {
|
|||||||
input.close();
|
input.close();
|
||||||
if (result instanceof Payload<Tag> payload) {
|
if (result instanceof Payload<Tag> payload) {
|
||||||
var tag = payload.get();
|
var tag = payload.get();
|
||||||
tag.find("class", s -> s.endsWith("single-event-title")) //
|
tag.find(attributeEndsWith("class", "single-event-title")) //
|
||||||
.stream()
|
.stream()
|
||||||
.map(Tag::children)
|
.map(Tag::children)
|
||||||
.filter(not(List::isEmpty))
|
.filter(not(List::isEmpty))
|
||||||
|
|||||||
Reference in New Issue
Block a user