added importer for CafeWagner

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
2025-01-02 00:52:21 +01:00
parent e0dde9aa9e
commit 3f80b13d8e
9 changed files with 270 additions and 59 deletions

View File

@@ -0,0 +1,197 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.cal.Util.extractBackgroundImage;
import static de.srsoftware.cal.Util.toNumericMonth;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Optionals.emptyIfNull;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Attachment;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.*;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.*;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class CafeWagner extends BaseImporter {
private static final Pattern DATE_FORMAT = Pattern.compile("(\\d\\d?)\\.\\s+(\\w+)\\s+(\\d{4})");
private static final String LOCATION_MVZ = "MVZ_Wagner, Kochstraße 2a, 07745 Jena";
private static final String DEFAULT_LOCATION = "Café Wagner, Wagnergasse 26, 07743 Jena";
private static final Coords MVZ_COORDS = new Coords(50.92532, 11.57909);
private static final Coords DEFAULT_COORDS = new Coords(50.93121, 11.58023);
public CafeWagner() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.cafewagner.de";
}
@Override
public String description() {
return "Importiert Events des Café Wagner in Jena";
}
@Override
protected List<Attachment> extractAttachments(Tag eventTag) {
var combined = new HashSet<>(super.extractAttachments(eventTag));
eventTag.find(attributeHas(CLASS,"image")).stream()
.map(tag -> extractBackgroundImage(tag,baseUrl()))
.map(Util::url)
.map(Util::toAttachment)
.map(Result::optional)
.flatMap(Optional::stream)
.forEach(combined::add);
return List.copyOf(combined);
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return ofType("main");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeHas(CLASS,"text-component");
}
@Override
protected Result<Tag> extractDescriptionTag(Tag eventTag){
var list = eventTag.find(extractDescriptionFilter());
if (list.isEmpty()) return error("Failed to find description tag");
return Payload.of(new Tag(DIV).addAll(list));
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
var res = super.extractLocation(eventTag);
if (res instanceof Payload<String> payload){
var location = payload.get().toLowerCase();
return Payload.of(location.contains("mvz") ? MVZ_COORDS : DEFAULT_COORDS);
}
return Payload.of(DEFAULT_COORDS);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return ofType("main");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
var list = opt.get().find(attributeEquals(CLASS,"event-calendar"));
if (list.isEmpty())return error("calendar div not found");
var calendar = list.getFirst();
var urls = calendar.find(IS_ANCHOR).stream()
.map(anchor -> anchor.get(HREF))
.filter(Objects::nonNull)
.distinct()
.map(url -> url.contains("://") ? url : baseUrl()+url)
.toList();
return Payload.of(urls);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return ofType("main");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
var res = super.extractLocation(eventTag);
if (res instanceof Payload<String> payload){
var location = payload.get().toLowerCase();
return Payload.of(location.contains("mvz") ? LOCATION_MVZ : DEFAULT_LOCATION);
}
return res;
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return attributeHas(CLASS,"event-location");
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return ofType("time").and(tag -> !emptyIfNull(tag.get("datetime")).isEmpty());
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return ofType("time").and(tag -> !emptyIfNull(tag.get("datetime")).isEmpty());
}
@Override
protected List<String> extractTags(Tag eventTag) {
var tags = new HashSet<String>();
tags.add("CafeWagner");
tags.add("Jena");
eventTag.find(attributeEquals(CLASS,"tag")).stream()
.flatMap(tag -> tag.find(ofType("span")).stream())
.map(Tag::strip)
.map(String::trim)
.forEach(tags::add);
return List.copyOf(tags);
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
var matcher = DATE_FORMAT.matcher(string);
if (matcher.find()){
var day = Integer.parseInt(matcher.group(1));
var month = toNumericMonth(matcher.group(2));
if (month.optional().isEmpty()) return transform(month);
var year = Integer.parseInt(matcher.group(3));
return Payload.of(LocalDate.of(year,month.optional().get(),day));
}
return error("Failed to recognize date in %s",string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return Util.parseGermanTime(string);
}
@Override
protected String programURL() {
return baseUrl()+"/de";
}
}

View File

@@ -10,7 +10,6 @@ import static de.srsoftware.tools.TagFilter.*;
import static java.nio.charset.StandardCharsets.UTF_8;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.*;
import java.io.ByteArrayInputStream;
@@ -22,7 +21,6 @@ import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.DuplicateFormatFlagsException;
import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Pattern;

View File

@@ -13,9 +13,6 @@ import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import de.srsoftware.tools.TagFilter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;