Browse Source

working on event extraction from rosenkeller

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
main
Stephan Richter 4 months ago
parent
commit
d7967b2d95
  1. 17
      de.srsoftware.cal.api/src/main/java/de/srsoftware/cal/api/Coords.java
  2. 2
      de.srsoftware.cal.importer/build.gradle.kts
  3. 44
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/BaseAppointment.java
  4. 161
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/JenaRosenkeller.java

17
de.srsoftware.cal.api/src/main/java/de/srsoftware/cal/api/Coords.java

@ -2,18 +2,9 @@ @@ -2,18 +2,9 @@
package de.srsoftware.cal.api;
/**
* cartesian coordinates
* cartesian coords
* @param longitude the longitude
* @param latitude the latitude
*/
public interface Coords {
/**
* the longitude
* @return the longitude
*/
double longitude();
/**
* the latitude
* @return the latitude
*/
double latitude();
public record Coords(double longitude, double latitude) {
}

2
de.srsoftware.cal.importer/build.gradle.kts

@ -4,5 +4,5 @@ dependencies { @@ -4,5 +4,5 @@ dependencies {
implementation(project(":de.srsoftware.cal.api"))
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.1.1")
implementation("de.srsoftware:tools.web:1.2.2")
implementation("de.srsoftware:tools.web:1.3.2")
}

44
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/BaseAppointment.java

@ -0,0 +1,44 @@ @@ -0,0 +1,44 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer;
import static java.util.Optional.empty;
import de.srsoftware.cal.api.Appointment;
import de.srsoftware.cal.api.Attachment;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.cal.api.Link;
import java.time.LocalDateTime;
import java.util.Optional;
import java.util.Set;
public abstract class BaseAppointment implements Appointment {
@Override
public Set<Attachment> attachments() {
return Set.of();
}
@Override
public Optional<Coords> coords() {
return empty();
}
@Override
public Optional<LocalDateTime> end() {
return empty();
}
@Override
public Set<String> tags() {
return Set.of();
}
@Override
public String toString() {
return "%s (%s)".formatted(title(), BaseAppointment.class.getSimpleName());
}
@Override
public Set<Link> urls() {
return Set.of();
}
}

161
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/JenaRosenkeller.java

@ -1,15 +1,14 @@ @@ -1,15 +1,14 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer;
import static de.srsoftware.tools.Optionals.nullable;
import static de.srsoftware.tools.TagFilter.*;
import static java.util.Optional.empty;
import static java.util.function.Predicate.not;
import de.srsoftware.cal.api.*;
import de.srsoftware.tools.*;
import de.srsoftware.tools.Error;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import de.srsoftware.tools.XMLParser;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
@ -19,66 +18,14 @@ import java.net.URL; @@ -19,66 +18,14 @@ import java.net.URL;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Stream;
public class JenaRosenkeller implements Importer {
private static class EmptyAppointment implements Appointment {
@Override
public Set<Attachment> attachments() {
return Set.of();
}
@Override
public Optional<Coords> coords() {
return empty();
}
@Override
public String description() {
return null;
}
@Override
public Optional<LocalDateTime> end() {
return empty();
}
@Override
public long id() {
return 0;
}
@Override
public String location() {
return null;
}
@Override
public LocalDateTime start() {
return null;
}
@Override
public Set<String> tags() {
return Set.of();
}
@Override
public String title() {
return null;
}
@Override
public String toString() {
return "%s (%s)".formatted(title(), EmptyAppointment.class.getSimpleName());
}
@Override
public Set<Link> urls() {
return Set.of();
}
}
private static final String APPOINTMENT_TAG_ID = "tribe-events-content";
private static final Coords DEFAULT_COORDS = new Coords(50.9294, 11.585);
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
@Override
public String description() {
@ -99,17 +46,31 @@ public class JenaRosenkeller implements Importer { @@ -99,17 +46,31 @@ public class JenaRosenkeller implements Importer {
.map(JenaRosenkeller::open)
.map(JenaRosenkeller::preload)
.map(JenaRosenkeller::parse)
.map(JenaRosenkeller::getEventDiv)
.map(JenaRosenkeller::loadEvent);
}
private static Result<Tag> getEventDiv(Result<Tag> pageResult) {
switch (pageResult) {
case Payload<Tag> payload:
List<Tag> list = payload.get().find(attributeEquals("id", APPOINTMENT_TAG_ID));
if (list.size() == 1) return Payload.of(list.getFirst());
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
case Error<Tag> err:
return err.transform();
default:
return Error.format("Invalid parameter: %s", pageResult.getClass().getSimpleName());
}
}
private static Result<List<String>> findEventUrls(Result<Tag> tagResult) {
return switch (tagResult) {
case Payload<Tag> payload -> {
List<String> urls = payload // find tag with event-id
.get()
.find("id", val -> val.startsWith("event-"))
.find(attributeStartsWith("id","event-"))
.stream()
.map(t -> t.find("class", "ect-event-url"::equals))
.map(t -> t.find(attributeEquals("class", "ect-event-url")))
.flatMap(List::stream)
.map(t -> t.get("href"))
.toList();
@ -161,23 +122,10 @@ public class JenaRosenkeller implements Importer { @@ -161,23 +122,10 @@ public class JenaRosenkeller implements Importer {
private static Result<Appointment> loadEvent(Result<Tag> domResult) {
switch (domResult) {
case Payload<Tag> payload:
var tag = payload.get();
Optional<String> title = tag.find("class", s -> s.endsWith("single-event-title")) //
.stream()
.map(Tag::children)
.filter(not(List::isEmpty))
.map(List::getFirst)
.map(Tag::toString)
.findAny();
if (title.isPresent()) {
var appointment = new EmptyAppointment() {
@Override
public String title() {
return title.get();
}
};
return Payload.of(appointment);
}
var appointmentTag = payload.get();
var title = extractTitle(appointmentTag);
var description = extractDescription(appointmentTag);
var start = extractStart(appointmentTag);
return Error.of("Could not find appointment title");
case Error<Tag> err:
return err.transform();
@ -186,6 +134,57 @@ public class JenaRosenkeller implements Importer { @@ -186,6 +134,57 @@ public class JenaRosenkeller implements Importer {
}
}
private static Optional<LocalDateTime> extractStart(Tag appointmentTag) {
return appointmentTag.find(attributeEquals("class", "tribe-event-date-start")).stream().flatMap(tag -> tag.inner(0).stream()).flatMap(txt -> toDateTime(txt).stream()).findAny();
}
private static Optional<LocalDateTime> toDateTime(String text) {
var match = DATE_PATTERN.matcher(text);
if (match.find()) {
var dayOfMonth = Integer.parseInt(match.group(1));
var month = toNumericMonth(match.group(2));
if (month.isEmpty()) return empty();
var hour = Integer.parseInt(nullable(match.group(4)).orElse("0"));
var minute = Integer.parseInt(nullable(match.group(5)).orElse("0"));
var now = LocalDateTime.now();
var date = LocalDateTime.of(now.getYear(), month.get(), dayOfMonth, hour, minute);
if (date.isBefore(now)) date = date.plusYears(1);
return Optional.of(date);
}
return empty();
}
private static Optional<Integer> toNumericMonth(String month) {
month = month.toLowerCase();
if (month.startsWith("ja")) return Optional.of(1);
if (month.startsWith("f")) return Optional.of(2);
if ("may".equals(month) || "mai".equals(month)) return Optional.of(5);
if (month.startsWith("m")) return Optional.of(3);
if (month.startsWith("ap")) return Optional.of(4);
if (month.startsWith("jun")) return Optional.of(6);
if (month.startsWith("jul")) return Optional.of(7);
if (month.startsWith("au")) return Optional.of(8);
if (month.startsWith("s")) return Optional.of(9);
if (month.startsWith("o")) return Optional.of(10);
if (month.startsWith("n")) return Optional.of(11);
if (month.startsWith("d")) return Optional.of(12);
return empty();
}
private static Optional<String> extractDescription(Tag appointmentTag) {
return appointmentTag.find(attributeHas("class", "tribe-events-single-event-description")).stream().flatMap(tag -> tag.inner(2).stream()).findAny();
}
private static Optional<String> extractTitle(Tag appointmentTag) {
return appointmentTag
.find(attributeEndsWith("class", "single-event-title")) //
.stream()
.flatMap(tag -> tag.inner(2).stream())
.findAny();
}
private static Optional<Appointment> nope(URL url) {
try {
var input = url.openConnection().getInputStream();
@ -194,7 +193,7 @@ public class JenaRosenkeller implements Importer { @@ -194,7 +193,7 @@ public class JenaRosenkeller implements Importer {
input.close();
if (result instanceof Payload<Tag> payload) {
var tag = payload.get();
tag.find("class", s -> s.endsWith("single-event-title")) //
tag.find(attributeEndsWith("class", "single-event-title")) //
.stream()
.map(Tag::children)
.filter(not(List::isEmpty))

Loading…
Cancel
Save