added importer for FHaus and Psychochor
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -16,8 +16,6 @@ import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
@@ -85,11 +83,6 @@ private static final String DEFAULT_LOCATION = "Cosmic Dawn e.V., Spitzweidenweg
|
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||
var page = programPage.optional();
|
||||
if (page.isEmpty()) return transform(programPage);
|
||||
try {
|
||||
Files.writeString(Path.of("/tmp/test.txt"),page.get().toString(2));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
var list = page.get().find(attributeEquals("class","event_listings_main"));
|
||||
var urlList = list.stream()
|
||||
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
|
||||
|
||||
@@ -0,0 +1,170 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer.jena;
|
||||
|
||||
import static de.srsoftware.cal.Util.parseGermanTime;
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.*;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
|
||||
import de.srsoftware.cal.BaseImporter;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class FHaus extends BaseImporter {
|
||||
|
||||
// TODO: verwendet den gleichen Typ Kalender wie der Rosenkeller – evtl. kann man diese beiden in einer Abstrakten Superklasse zusammenführen
|
||||
|
||||
public static final Coords DEFAULT_COORDS = new Coords(50.9293, 11.58228);
|
||||
public static final String DEFAULT_LOCATION = "F-Haus, Krautgasse 14, 07743 Jena";
|
||||
public FHaus() throws NoSuchAlgorithmException {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String baseUrl() {
|
||||
return "https://www.f-haus.de";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Importer für Events des Jenaer F-Haus";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||
return attributeEquals(ID,"tribe-events-content");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return attributeContains(CLASS,"single-event-description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
return Payload.of(DEFAULT_COORDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndDateFilter() {
|
||||
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"date")));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndTimeFilter() {
|
||||
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-end").and(attributeContains(CLASS,"time")));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEventTagFilter() {
|
||||
return attributeEquals(ID,"tribe-events-content");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||
var opt = programPage.optional();
|
||||
if (opt.isEmpty()) return transform(programPage);
|
||||
List<String> urls = opt.get().find(attributeEquals(CLASS,"tribe-events-calendar-list"))
|
||||
.stream().flatMap(tag -> tag.find(IS_ANCHOR).stream())
|
||||
.map(tag -> tag.get(HREF))
|
||||
.filter(Objects::nonNull)
|
||||
.filter(url -> url.contains("/event/"))
|
||||
.map(url -> url.contains("://") ? url : baseUrl()+url)
|
||||
.distinct()
|
||||
.toList();
|
||||
return Payload.of(urls);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLinksFilter() {
|
||||
return attributeContains(CLASS,"single-event-description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<String> extractLocation(Tag eventTag) {
|
||||
return Payload.of(DEFAULT_LOCATION);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLocationFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
protected Result<LocalDate> extractStartDate(Tag eventTag) {
|
||||
Result<Tag> startDateTag = extractStartDateTag(eventTag);
|
||||
var opt = startDateTag.optional();
|
||||
if (opt.isEmpty()) return transform(startDateTag);
|
||||
return parseStartDate(opt.get().get(TITLE));
|
||||
}
|
||||
|
||||
protected Result<Tag> extractStartDateTag(Tag eventTag) {
|
||||
var list = eventTag.find(extractStartDateFilter());
|
||||
if (list.isEmpty()) {
|
||||
return error("Failed to find start date tag");
|
||||
}
|
||||
return Payload.of(list.getFirst());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartDateFilter() {
|
||||
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"date")));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartTimeFilter() {
|
||||
return attributeHas(CLASS,"tribe-events-abbr").and(attributeContains(CLASS,"tribe-events-start").and(attributeContains(CLASS,"time")));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> extractTags(Tag eventTag) {
|
||||
return List.of("F-Haus", "Jena");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractTitleFilter() {
|
||||
return ofType("h1");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseEndDate(String string) {
|
||||
try {
|
||||
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
|
||||
} catch (Exception e){
|
||||
return error(e,"Failed to parse date: %s",string);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseEndTime(String string) {
|
||||
return parseGermanTime(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseStartDate(String string) {
|
||||
try {
|
||||
return Payload.of(LocalDate.parse(string, DateTimeFormatter.ISO_DATE));
|
||||
} catch (Exception e){
|
||||
return error(e,"Failed to parse date: %s",string);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseStartTime(String string) {
|
||||
return parseGermanTime(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String programURL() {
|
||||
return baseUrl()+"/cms/events";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,180 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer.jena;
|
||||
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.CLASS;
|
||||
import static de.srsoftware.tools.Tag.ID;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
|
||||
import de.srsoftware.cal.SinglePageImporter;
|
||||
import de.srsoftware.cal.Util;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class Psychochor extends SinglePageImporter {
|
||||
|
||||
private static final Pattern DATE_TIME_PATTERN = Pattern.compile("(\\d{4}).*(\\d\\d+)\\W*([a-zA-Z]+)\\W*(\\d\\d?):(\\d\\d?)");
|
||||
private static final Pattern LATITUDE = Pattern.compile("!3d(-?\\d+\\.\\d{1,5})");
|
||||
private static final Pattern LONGITUDE = Pattern.compile("!2d(-?\\d+\\.\\d{1,5})");
|
||||
|
||||
public Psychochor() throws NoSuchAlgorithmException {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String baseUrl() {
|
||||
return "https://www.psycho-chor.de";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Importer für Events des Jenaer Psychochors";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
var list = eventTag.find(ofType("iframe"));
|
||||
if (list.isEmpty()) return error("No iframe found");
|
||||
return list.stream().map(iframe -> iframe.get("data-src-cmplz"))
|
||||
.filter(Objects::nonNull)
|
||||
.map(this::parseCoords).findAny()
|
||||
.orElseGet(() -> error("No coordinates found!"));
|
||||
}
|
||||
|
||||
private Result<Coords> parseCoords(String s) {
|
||||
var latitude = LATITUDE.matcher(s);
|
||||
var longitude = LONGITUDE.matcher(s);
|
||||
if (latitude.find() && longitude.find()){
|
||||
double lat = Double.parseDouble(latitude.group(1));
|
||||
double lon = Double.parseDouble(longitude.group(1));
|
||||
return Payload.of(new Coords(lat,lon));
|
||||
}
|
||||
return error("Failed to parse coords from %s",s);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndDateFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndTimeFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEventTagFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<List<Tag>> extractEventTags(Result<Tag> tagResult) {
|
||||
var opt = tagResult.optional();
|
||||
if (opt.isEmpty()) return transform(tagResult);
|
||||
|
||||
List<Tag> eventTags = opt.get().find(attributeEquals(ID, "evcal_list")).stream()
|
||||
.flatMap(tag -> tag.find(attributeStartsWith(ID, "event_")).stream())
|
||||
.toList();
|
||||
return Payload.of(eventTags);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLinksFilter() {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLocationFilter() {
|
||||
return attributeContains(CLASS,"evcal_location");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartDateFilter() {
|
||||
return attributeContains(CLASS,"evo_start");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartTimeFilter() {
|
||||
return attributeContains(CLASS,"evo_start");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> extractTags(Tag eventTag) {
|
||||
var eventTags = new ArrayList<String>();
|
||||
eventTags.add("Psychochor");
|
||||
eventTags.add("Jena");
|
||||
eventTag.find(attributeEquals("data-filter","event_type")).stream().map(Tag::strip).forEach(eventTags::add);
|
||||
return eventTags;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractTitleFilter() {
|
||||
return attributeHas(CLASS,"evcal_event_title");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseEndDate(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseEndTime(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseStartDate(String string) {
|
||||
var matcher = DATE_TIME_PATTERN.matcher(string);
|
||||
if (matcher.find()){
|
||||
int year = Integer.parseInt(matcher.group(1));
|
||||
var res = Util.toNumericMonth(matcher.group(3));
|
||||
if (res.optional().isEmpty()) return transform(res);
|
||||
int month = res.optional().get();
|
||||
int day = Integer.parseInt(matcher.group(2));
|
||||
return Payload.of(LocalDate.of(year,month,day));
|
||||
}
|
||||
return error("Failed to parse date from %s",string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseStartTime(String string) {
|
||||
var matcher = DATE_TIME_PATTERN.matcher(string);
|
||||
if (matcher.find()){
|
||||
int hour = Integer.parseInt(matcher.group(4));
|
||||
int min = Integer.parseInt(matcher.group(5));
|
||||
return Payload.of(LocalTime.of(hour,min));
|
||||
}
|
||||
return error("Failed to parse date from %s",string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String programURL() {
|
||||
return baseUrl()+"/de/events";
|
||||
}
|
||||
}
|
||||
@@ -1,18 +1,17 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer.jena;
|
||||
|
||||
import static de.srsoftware.cal.Util.parseGermanTime;
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.*;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
|
||||
import de.srsoftware.cal.BaseImporter;
|
||||
import de.srsoftware.cal.Util;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
import de.srsoftware.tools.TagFilter;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
@@ -21,6 +20,9 @@ import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class Rosenkeller extends BaseImporter {
|
||||
|
||||
// TODO: verwendet den gleichen Typ Kalender wie das F-Haus – evtl. kann man diese beiden in einer Abstrakten Superklasse zusammenführen
|
||||
|
||||
private static final String BASE_URL = "https://rosenkeller.org";
|
||||
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
||||
private static final Coords COORDS = new Coords(50.92945, 11.58491);
|
||||
@@ -40,7 +42,7 @@ public class Rosenkeller extends BaseImporter {
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||
return TagFilter.attributeEquals(ID,"tribe-events-content");
|
||||
return attributeEquals(ID,"tribe-events-content");
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -72,7 +74,7 @@ public class Rosenkeller extends BaseImporter {
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEventTagFilter() {
|
||||
return TagFilter.attributeEquals(ID,"tribe-events-content");
|
||||
return attributeEquals(ID,"tribe-events-content");
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -150,7 +152,7 @@ public class Rosenkeller extends BaseImporter {
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseEndTime(String string) {
|
||||
return Util.parseGermanTime(string);
|
||||
return parseGermanTime(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -164,7 +166,7 @@ public class Rosenkeller extends BaseImporter {
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseStartTime(String string) {
|
||||
return Util.parseGermanTime(string);
|
||||
return parseGermanTime(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Reference in New Issue
Block a user