added importer for FHaus and Psychochor

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
2025-01-02 12:28:14 +01:00
parent 3f80b13d8e
commit 2491e4fbf0
10 changed files with 421 additions and 24 deletions

View File

@@ -16,8 +16,6 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
@@ -85,11 +83,6 @@ private static final String DEFAULT_LOCATION = "Cosmic Dawn e.V., Spitzweidenweg
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var page = programPage.optional();
if (page.isEmpty()) return transform(programPage);
try {
Files.writeString(Path.of("/tmp/test.txt"),page.get().toString(2));
} catch (IOException e) {
throw new RuntimeException(e);
}
var list = page.get().find(attributeEquals("class","event_listings_main"));
var urlList = list.stream()
.flatMap(tag -> tag.find(IS_ANCHOR).stream())

View File

@@ -0,0 +1,170 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
public class FHaus extends BaseImporter {
// TODO: verwendet den gleichen Typ Kalender wie der Rosenkeller evtl. kann man diese beiden in einer Abstrakten Superklasse zusammenführen
public static final Coords DEFAULT_COORDS = new Coords(50.9293, 11.58228);
public static final String DEFAULT_LOCATION = "F-Haus, Krautgasse 14, 07743 Jena";
public FHaus() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.f-haus.de";
}
@Override
public String description() {
return "Importer für Events des Jenaer F-Haus";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeEquals(ID,"tribe-events-content");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeContains(CLASS,"single-event-description");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
return Payload.of(DEFAULT_COORDS);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"date")));
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"time")));
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return attributeEquals(ID,"tribe-events-content");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
List<String> urls = opt.get().find(attributeEquals(CLASS,"tribe-events-calendar-list"))
.stream().flatMap(tag -> tag.find(IS_ANCHOR).stream())
.map(tag -> tag.get(HREF))
.filter(Objects::nonNull)
.filter(url -> url.contains("/event/"))
.map(url -> url.contains("://") ? url : baseUrl()+url)
.distinct()
.toList();
return Payload.of(urls);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeContains(CLASS,"single-event-description");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
return Payload.of(DEFAULT_LOCATION);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return null;
}
protected Result<LocalDate> extractStartDate(Tag eventTag) {
Result<Tag> startDateTag = extractStartDateTag(eventTag);
var opt = startDateTag.optional();
if (opt.isEmpty()) return transform(startDateTag);
return parseStartDate(opt.get().get(TITLE));
}
protected Result<Tag> extractStartDateTag(Tag eventTag) {
var list = eventTag.find(extractStartDateFilter());
if (list.isEmpty()) {
return error("Failed to find start date tag");
}
return Payload.of(list.getFirst());
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"date")));
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"time")));
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of("F-Haus", "Jena");
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return parseGermanTime(string);
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
try {
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
} catch (Exception e){
return error(e,"Failed to parse date: %s",string);
}
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string);
}
@Override
protected String programURL() {
return baseUrl()+"/cms/events";
}
}

View File

@@ -0,0 +1,180 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.CLASS;
import static de.srsoftware.tools.Tag.ID;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.SinglePageImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class Psychochor extends SinglePageImporter {
private static final Pattern DATE_TIME_PATTERN = Pattern.compile("(\\d{4}).*(\\d\\d+)\\W*([a-zA-Z]+)\\W*(\\d\\d?):(\\d\\d?)");
private static final Pattern LATITUDE = Pattern.compile("!3d(-?\\d+\\.\\d{1,5})");
private static final Pattern LONGITUDE = Pattern.compile("!2d(-?\\d+\\.\\d{1,5})");
public Psychochor() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.psycho-chor.de";
}
@Override
public String description() {
return "Importer für Events des Jenaer Psychochors";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
var list = eventTag.find(ofType("iframe"));
if (list.isEmpty()) return error("No iframe found");
return list.stream().map(iframe -> iframe.get("data-src-cmplz"))
.filter(Objects::nonNull)
.map(this::parseCoords).findAny()
.orElseGet(() -> error("No coordinates found!"));
}
private Result<Coords> parseCoords(String s) {
var latitude = LATITUDE.matcher(s);
var longitude = LONGITUDE.matcher(s);
if (latitude.find() && longitude.find()){
double lat = Double.parseDouble(latitude.group(1));
double lon = Double.parseDouble(longitude.group(1));
return Payload.of(new Coords(lat,lon));
}
return error("Failed to parse coords from %s",s);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return null;
}
@Override
protected Result<List<Tag>> extractEventTags(Result<Tag> tagResult) {
var opt = tagResult.optional();
if (opt.isEmpty()) return transform(tagResult);
List<Tag> eventTags = opt.get().find(attributeEquals(ID, "evcal_list")).stream()
.flatMap(tag -> tag.find(attributeStartsWith(ID, "event_")).stream())
.toList();
return Payload.of(eventTags);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return attributeContains(CLASS,"evcal_location");
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeContains(CLASS,"evo_start");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeContains(CLASS,"evo_start");
}
@Override
protected List<String> extractTags(Tag eventTag) {
var eventTags = new ArrayList<String>();
eventTags.add("Psychochor");
eventTags.add("Jena");
eventTag.find(attributeEquals("data-filter","event_type")).stream().map(Tag::strip).forEach(eventTags::add);
return eventTags;
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return attributeHas(CLASS,"evcal_event_title");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
var matcher = DATE_TIME_PATTERN.matcher(string);
if (matcher.find()){
int year = Integer.parseInt(matcher.group(1));
var res = Util.toNumericMonth(matcher.group(3));
if (res.optional().isEmpty()) return transform(res);
int month = res.optional().get();
int day = Integer.parseInt(matcher.group(2));
return Payload.of(LocalDate.of(year,month,day));
}
return error("Failed to parse date from %s",string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
var matcher = DATE_TIME_PATTERN.matcher(string);
if (matcher.find()){
int hour = Integer.parseInt(matcher.group(4));
int min = Integer.parseInt(matcher.group(5));
return Payload.of(LocalTime.of(hour,min));
}
return error("Failed to parse date from %s",string);
}
@Override
protected String programURL() {
return baseUrl()+"/de/events";
}
}

View File

@@ -1,18 +1,17 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import de.srsoftware.tools.TagFilter;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
@@ -21,6 +20,9 @@ import java.util.List;
import java.util.function.Predicate;
public class Rosenkeller extends BaseImporter {
// TODO: verwendet den gleichen Typ Kalender wie das F-Haus evtl. kann man diese beiden in einer Abstrakten Superklasse zusammenführen
private static final String BASE_URL = "https://rosenkeller.org";
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
private static final Coords COORDS = new Coords(50.92945, 11.58491);
@@ -40,7 +42,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
return attributeEquals(ID,"tribe-events-content");
}
@Override
@@ -72,7 +74,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Predicate<Tag> extractEventTagFilter() {
return TagFilter.attributeEquals(ID,"tribe-events-content");
return attributeEquals(ID,"tribe-events-content");
}
@Override
@@ -150,7 +152,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Result<LocalTime> parseEndTime(String string) {
return Util.parseGermanTime(string);
return parseGermanTime(string);
}
@Override
@@ -164,7 +166,7 @@ public class Rosenkeller extends BaseImporter {
@Override
protected Result<LocalTime> parseStartTime(String string) {
return Util.parseGermanTime(string);
return parseGermanTime(string);
}
@Override