Browse Source

added importer for FHaus and Psychochor

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
main
Stephan Richter 4 months ago
parent
commit
2491e4fbf0
  1. 2
      de.srsoftware.cal.app/build.gradle.kts
  2. 2
      de.srsoftware.cal.base/build.gradle.kts
  3. 17
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java
  4. 47
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/SinglePageImporter.java
  5. 4
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java
  6. 2
      de.srsoftware.cal.importer/build.gradle.kts
  7. 7
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/CosmicDawn.java
  8. 170
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/FHaus.java
  9. 180
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Psychochor.java
  10. 14
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

2
de.srsoftware.cal.app/build.gradle.kts

@ -14,6 +14,6 @@ dependencies { @@ -14,6 +14,6 @@ dependencies {
implementation("de.srsoftware:tools.logging:1.0.3")
implementation("de.srsoftware:tools.plugin:1.0.1")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.11")
implementation("de.srsoftware:tools.web:1.3.12")
implementation("com.mysql:mysql-connector-j:9.1.0")
}

2
de.srsoftware.cal.base/build.gradle.kts

@ -5,6 +5,6 @@ dependencies { @@ -5,6 +5,6 @@ dependencies {
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.11")
implementation("de.srsoftware:tools.web:1.3.12")
implementation("org.json:json:20240303")
}

17
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java

@ -13,6 +13,8 @@ import de.srsoftware.tools.*; @@ -13,6 +13,8 @@ import de.srsoftware.tools.*;
import java.io.IOException;
import java.io.InputStream;
import java.net.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
@ -25,10 +27,11 @@ import java.util.function.Predicate; @@ -25,10 +27,11 @@ import java.util.function.Predicate;
import java.util.stream.Stream;
public abstract class BaseImporter implements Importer {
public static final System.Logger LOG = System.getLogger(BaseImporter.class.getSimpleName());
private static final String SHA256 = "SHA-256";
private final MessageDigest digest;
protected BaseImporter() throws NoSuchAlgorithmException {
public BaseImporter() throws NoSuchAlgorithmException {
digest = MessageDigest.getInstance(SHA256);
}
@ -66,7 +69,7 @@ public abstract class BaseImporter implements Importer { @@ -66,7 +69,7 @@ public abstract class BaseImporter implements Importer {
Result<Tag> descriptionTag = extractDescriptionTag(eventTag);
if (descriptionTag.optional().isEmpty()) return transform(descriptionTag);
Tag tag = descriptionTag.optional().get();
tag.find(t -> t.is("iframe")).forEach(Tag::remove);
tag.find(ofType("iframe")).forEach(Tag::remove);
var inner = tag.inner(2);
return inner.isPresent() ? Payload.of(inner.get()) : error("No description found");
}
@ -126,6 +129,9 @@ public abstract class BaseImporter implements Importer { @@ -126,6 +129,9 @@ public abstract class BaseImporter implements Importer {
long id = 0;
// wird vor extractDescription ausgeführt, da extractDescription das DOM verändert
var coords = extractCoords(eventTag);
var titleResult = extractTitle(eventTag);
if (titleResult.optional().isEmpty()) return transform(titleResult);
String title = titleResult.optional().get();
@ -151,7 +157,7 @@ public abstract class BaseImporter implements Importer { @@ -151,7 +157,7 @@ public abstract class BaseImporter implements Importer {
.addLinks(eventPage)
.tags(extractTags(eventTag));
extractCoords(eventTag).optional().ifPresent(event::coords);
coords.optional().ifPresent(event::coords);
return Payload.of(event);
}
@ -205,7 +211,7 @@ public abstract class BaseImporter implements Importer { @@ -205,7 +211,7 @@ public abstract class BaseImporter implements Importer {
protected Result<String> extractLocation(Tag eventTag) {
Result<Tag> locationTag = extractLocationTag(eventTag);
if (locationTag.optional().isEmpty()) return transform(locationTag);
return Payload.of(locationTag.optional().get().toString(2));
return Payload.of(locationTag.optional().get().strip());
}
protected Result<Tag> extractLocationTag(Tag eventTag){
@ -257,7 +263,7 @@ public abstract class BaseImporter implements Importer { @@ -257,7 +263,7 @@ public abstract class BaseImporter implements Importer {
protected abstract List<String> extractTags(Tag eventTag);
protected Result<String> extractTitle(Tag eventTag) {
Result<Tag> titleTag = extractTitleTag(eventTag);
Result<Tag> titleTag = extractTitleTag(eventTag);
if (titleTag.optional().isEmpty()) return transform(titleTag);
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
return inner.isPresent() ? Payload.of(inner.get().trim()) : error("No title found");
@ -283,6 +289,7 @@ public abstract class BaseImporter implements Importer { @@ -283,6 +289,7 @@ public abstract class BaseImporter implements Importer {
return urls //
.map(Util::url)
.map(this::loadEvent)
.peek(res -> { if (res.optional().isEmpty()) LOG.log(System.Logger.Level.WARNING,res); })
.flatMap(result -> result.optional().stream());
}

47
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/SinglePageImporter.java

@ -0,0 +1,47 @@ @@ -0,0 +1,47 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal;
import static de.srsoftware.cal.Util.url;
import de.srsoftware.cal.api.Appointment;
import de.srsoftware.cal.api.Link;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.util.List;
import java.util.stream.Stream;
public abstract class SinglePageImporter extends BaseImporter{
public SinglePageImporter() throws NoSuchAlgorithmException {
super();
}
@Override
public Stream<Appointment> fetch() {
var programPage = Payload.of(programURL());
var eventLink = programPage //
.map(Util::url).optional()
.map(url -> new Link(url, "Event-Seite"))
.orElse(null);
Stream<Result<Tag>> eventTags = url(programPage).map(this::open).map(this::preload)
.map(this::parseXML)
.map(this::extractEventTags)
.stream();
return eventTags.map(tagResult -> extractEvent(tagResult, eventLink))
.peek(res -> {
if (res.optional().isEmpty()) LOG.log(System.Logger.Level.WARNING, res);
})
.flatMap(res -> res.optional().stream());
}
protected abstract Result<List<Tag>> extractEventTags(Result<Tag> tagResult);
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
return null; // not needed by this fetch implementation
}
}

4
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java

@ -11,9 +11,7 @@ import de.srsoftware.cal.api.Coords; @@ -11,9 +11,7 @@ import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.time.LocalDate;
import java.time.LocalDateTime;
@ -190,7 +188,7 @@ public class Util { @@ -190,7 +188,7 @@ public class Util {
var url = urlResult.optional().get();
try {
return Payload.of(new URI(url).toURL());
} catch (MalformedURLException | URISyntaxException e) {
} catch (Exception e) {
return error(e, "Failed to create URL of %s", url);
}
}

2
de.srsoftware.cal.importer/build.gradle.kts

@ -5,5 +5,5 @@ dependencies { @@ -5,5 +5,5 @@ dependencies {
implementation(project(":de.srsoftware.cal.base"))
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.3.0")
implementation("de.srsoftware:tools.web:1.3.11")
implementation("de.srsoftware:tools.web:1.3.12")
}

7
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/CosmicDawn.java

@ -16,8 +16,6 @@ import java.io.ByteArrayInputStream; @@ -16,8 +16,6 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
@ -85,11 +83,6 @@ private static final String DEFAULT_LOCATION = "Cosmic Dawn e.V., Spitzweidenweg @@ -85,11 +83,6 @@ private static final String DEFAULT_LOCATION = "Cosmic Dawn e.V., Spitzweidenweg
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var page = programPage.optional();
if (page.isEmpty()) return transform(programPage);
try {
Files.writeString(Path.of("/tmp/test.txt"),page.get().toString(2));
} catch (IOException e) {
throw new RuntimeException(e);
}
var list = page.get().find(attributeEquals("class","event_listings_main"));
var urlList = list.stream()
.flatMap(tag -> tag.find(IS_ANCHOR).stream())

170
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/FHaus.java

@ -0,0 +1,170 @@ @@ -0,0 +1,170 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
public class FHaus extends BaseImporter {
// TODO: verwendet den gleichen Typ Kalender wie der Rosenkeller – evtl. kann man diese beiden in einer Abstrakten Superklasse zusammenführen
public static final Coords DEFAULT_COORDS = new Coords(50.9293, 11.58228);
public static final String DEFAULT_LOCATION = "F-Haus, Krautgasse 14, 07743 Jena";
public FHaus() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.f-haus.de";
}
@Override
public String description() {
return "Importer für Events des Jenaer F-Haus";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeEquals(ID,"tribe-events-content");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeContains(CLASS,"single-event-description");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
return Payload.of(DEFAULT_COORDS);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"date")));
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"time")));
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return attributeEquals(ID,"tribe-events-content");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
List<String> urls = opt.get().find(attributeEquals(CLASS,"tribe-events-calendar-list"))
.stream().flatMap(tag -> tag.find(IS_ANCHOR).stream())
.map(tag -> tag.get(HREF))
.filter(Objects::nonNull)
.filter(url -> url.contains("/event/"))
.map(url -> url.contains("://") ? url : baseUrl()+url)
.distinct()
.toList();
return Payload.of(urls);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeContains(CLASS,"single-event-description");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
return Payload.of(DEFAULT_LOCATION);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return null;
}
protected Result<LocalDate> extractStartDate(Tag eventTag) {
Result<Tag> startDateTag = extractStartDateTag(eventTag);
var opt = startDateTag.optional();
if (opt.isEmpty()) return transform(startDateTag);
return parseStartDate(opt.get().get(TITLE));
}
protected Result<Tag> extractStartDateTag(Tag eventTag) {
var list = eventTag.find(extractStartDateFilter());
if (list.isEmpty()) {
return error("Failed to find start date tag");
}
return Payload.of(list.getFirst());
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"date")));
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"time")));
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of("F-Haus", "Jena");
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return parseGermanTime(string);
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string);
}
@Override
protected String programURL() {
return baseUrl()+"/cms/events";
}
}

180
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Psychochor.java

@ -0,0 +1,180 @@ @@ -0,0 +1,180 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.CLASS;
import static de.srsoftware.tools.Tag.ID;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.SinglePageImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class Psychochor extends SinglePageImporter {
private static final Pattern DATE_TIME_PATTERN = Pattern.compile("(\\d{4}).*(\\d\\d+)\\W*([a-zA-Z]+)\\W*(\\d\\d?):(\\d\\d?)");
private static final Pattern LATITUDE = Pattern.compile("!3d(-?\\d+\\.\\d{1,5})");
private static final Pattern LONGITUDE = Pattern.compile("!2d(-?\\d+\\.\\d{1,5})");
public Psychochor() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.psycho-chor.de";
}
@Override
public String description() {
return "Importer für Events des Jenaer Psychochors";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
var list = eventTag.find(ofType("iframe"));
if (list.isEmpty()) return error("No iframe found");
return list.stream().map(iframe -> iframe.get("data-src-cmplz"))
.filter(Objects::nonNull)
.map(this::parseCoords).findAny()
.orElseGet(() -> error("No coordinates found!"));
}
private Result<Coords> parseCoords(String s) {
var latitude = LATITUDE.matcher(s);
var longitude = LONGITUDE.matcher(s);
if (latitude.find() && longitude.find()){
double lat = Double.parseDouble(latitude.group(1));
double lon = Double.parseDouble(longitude.group(1));
return Payload.of(new Coords(lat,lon));
}
return error("Failed to parse coords from %s",s);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return null;
}
@Override
protected Result<List<Tag>> extractEventTags(Result<Tag> tagResult) {
var opt = tagResult.optional();
if (opt.isEmpty()) return transform(tagResult);
List<Tag> eventTags = opt.get().find(attributeEquals(ID, "evcal_list")).stream()
.flatMap(tag -> tag.find(attributeStartsWith(ID, "event_")).stream())
.toList();
return Payload.of(eventTags);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return attributeContains(CLASS,"evcal_location");
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeContains(CLASS,"evo_start");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeContains(CLASS,"evo_start");
}
@Override
protected List<String> extractTags(Tag eventTag) {
var eventTags = new ArrayList<String>();
eventTags.add("Psychochor");
eventTags.add("Jena");
eventTag.find(attributeEquals("data-filter","event_type")).stream().map(Tag::strip).forEach(eventTags::add);
return eventTags;
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return attributeHas(CLASS,"evcal_event_title");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
var matcher = DATE_TIME_PATTERN.matcher(string);
if (matcher.find()){
int year = Integer.parseInt(matcher.group(1));
var res = Util.toNumericMonth(matcher.group(3));
if (res.optional().isEmpty()) return transform(res);
int month = res.optional().get();
int day = Integer.parseInt(matcher.group(2));
return Payload.of(LocalDate.of(year,month,day));
}
return error("Failed to parse date from %s",string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
var matcher = DATE_TIME_PATTERN.matcher(string);
if (matcher.find()){
int hour = Integer.parseInt(matcher.group(4));
int min = Integer.parseInt(matcher.group(5));
return Payload.of(LocalTime.of(hour,min));
}
return error("Failed to parse date from %s",string);
}
@Override
protected String programURL() {
return baseUrl()+"/de/events";
}
}

14
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

@ -1,18 +1,17 @@ @@ -1,18 +1,17 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import de.srsoftware.tools.TagFilter;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
@ -21,6 +20,9 @@ import java.util.List; @@ -21,6 +20,9 @@ import java.util.List;
import java.util.function.Predicate;
public class Rosenkeller extends BaseImporter {
// TODO: verwendet den gleichen Typ Kalender wie das F-Haus – evtl. kann man diese beiden in einer Abstrakten Superklasse zusammenführen
private static final String BASE_URL = "https://rosenkeller.org";
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
private static final Coords COORDS = new Coords(50.92945, 11.58491);
@ -40,7 +42,7 @@ public class Rosenkeller extends BaseImporter { @@ -40,7 +42,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
return attributeEquals(ID,"tribe-events-content");
}
@Override
@ -72,7 +74,7 @@ public class Rosenkeller extends BaseImporter { @@ -72,7 +74,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Predicate<Tag> extractEventTagFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
return attributeEquals(ID,"tribe-events-content");
}
@Override
@ -150,7 +152,7 @@ public class Rosenkeller extends BaseImporter { @@ -150,7 +152,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Result<LocalTime> parseEndTime(String string) {
return Util.parseGermanTime(string);
return parseGermanTime(string);
}
@Override
@ -164,7 +166,7 @@ public class Rosenkeller extends BaseImporter { @@ -164,7 +166,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Result<LocalTime> parseStartTime(String string) {
return Util.parseGermanTime(string);
return parseGermanTime(string);
}
@Override

Loading…
Cancel
Save