added importer for CafeWagner
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -0,0 +1,197 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer.jena;
|
||||
|
||||
import static de.srsoftware.cal.Util.extractBackgroundImage;
|
||||
import static de.srsoftware.cal.Util.toNumericMonth;
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Optionals.emptyIfNull;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.*;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
|
||||
import de.srsoftware.cal.BaseImporter;
|
||||
import de.srsoftware.cal.Util;
|
||||
import de.srsoftware.cal.api.Attachment;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.*;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.util.*;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class CafeWagner extends BaseImporter {
|
||||
private static final Pattern DATE_FORMAT = Pattern.compile("(\\d\\d?)\\.\\s+(\\w+)\\s+(\\d{4})");
|
||||
private static final String LOCATION_MVZ = "MVZ_Wagner, Kochstraße 2a, 07745 Jena";
|
||||
private static final String DEFAULT_LOCATION = "Café Wagner, Wagnergasse 26, 07743 Jena";
|
||||
private static final Coords MVZ_COORDS = new Coords(50.92532, 11.57909);
|
||||
private static final Coords DEFAULT_COORDS = new Coords(50.93121, 11.58023);
|
||||
|
||||
public CafeWagner() throws NoSuchAlgorithmException {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String baseUrl() {
|
||||
return "https://www.cafewagner.de";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Importiert Events des Café Wagner in Jena";
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected List<Attachment> extractAttachments(Tag eventTag) {
|
||||
var combined = new HashSet<>(super.extractAttachments(eventTag));
|
||||
eventTag.find(attributeHas(CLASS,"image")).stream()
|
||||
.map(tag -> extractBackgroundImage(tag,baseUrl()))
|
||||
.map(Util::url)
|
||||
.map(Util::toAttachment)
|
||||
.map(Result::optional)
|
||||
.flatMap(Optional::stream)
|
||||
.forEach(combined::add);
|
||||
return List.copyOf(combined);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||
return ofType("main");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return attributeHas(CLASS,"text-component");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractDescriptionTag(Tag eventTag){
|
||||
var list = eventTag.find(extractDescriptionFilter());
|
||||
if (list.isEmpty()) return error("Failed to find description tag");
|
||||
return Payload.of(new Tag(DIV).addAll(list));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
var res = super.extractLocation(eventTag);
|
||||
if (res instanceof Payload<String> payload){
|
||||
var location = payload.get().toLowerCase();
|
||||
return Payload.of(location.contains("mvz") ? MVZ_COORDS : DEFAULT_COORDS);
|
||||
}
|
||||
return Payload.of(DEFAULT_COORDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndDateFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndTimeFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEventTagFilter() {
|
||||
return ofType("main");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||
var opt = programPage.optional();
|
||||
if (opt.isEmpty()) return transform(programPage);
|
||||
var list = opt.get().find(attributeEquals(CLASS,"event-calendar"));
|
||||
if (list.isEmpty())return error("calendar div not found");
|
||||
var calendar = list.getFirst();
|
||||
var urls = calendar.find(IS_ANCHOR).stream()
|
||||
.map(anchor -> anchor.get(HREF))
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.map(url -> url.contains("://") ? url : baseUrl()+url)
|
||||
.toList();
|
||||
return Payload.of(urls);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLinksFilter() {
|
||||
return ofType("main");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<String> extractLocation(Tag eventTag) {
|
||||
var res = super.extractLocation(eventTag);
|
||||
if (res instanceof Payload<String> payload){
|
||||
var location = payload.get().toLowerCase();
|
||||
return Payload.of(location.contains("mvz") ? LOCATION_MVZ : DEFAULT_LOCATION);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLocationFilter() {
|
||||
return attributeHas(CLASS,"event-location");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartDateFilter() {
|
||||
return ofType("time").and(tag -> !emptyIfNull(tag.get("datetime")).isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartTimeFilter() {
|
||||
return ofType("time").and(tag -> !emptyIfNull(tag.get("datetime")).isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> extractTags(Tag eventTag) {
|
||||
var tags = new HashSet<String>();
|
||||
tags.add("CafeWagner");
|
||||
tags.add("Jena");
|
||||
eventTag.find(attributeEquals(CLASS,"tag")).stream()
|
||||
.flatMap(tag -> tag.find(ofType("span")).stream())
|
||||
.map(Tag::strip)
|
||||
.map(String::trim)
|
||||
.forEach(tags::add);
|
||||
return List.copyOf(tags);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractTitleFilter() {
|
||||
return ofType("h1");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseEndDate(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseEndTime(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseStartDate(String string) {
|
||||
var matcher = DATE_FORMAT.matcher(string);
|
||||
if (matcher.find()){
|
||||
var day = Integer.parseInt(matcher.group(1));
|
||||
var month = toNumericMonth(matcher.group(2));
|
||||
if (month.optional().isEmpty()) return transform(month);
|
||||
var year = Integer.parseInt(matcher.group(3));
|
||||
return Payload.of(LocalDate.of(year,month.optional().get(),day));
|
||||
}
|
||||
return error("Failed to recognize date in %s",string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseStartTime(String string) {
|
||||
return Util.parseGermanTime(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String programURL() {
|
||||
return baseUrl()+"/de";
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,6 @@ import static de.srsoftware.tools.TagFilter.*;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import de.srsoftware.cal.BaseImporter;
|
||||
import de.srsoftware.cal.Util;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.*;
|
||||
import java.io.ByteArrayInputStream;
|
||||
@@ -22,7 +21,6 @@ import java.nio.file.Path;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.util.DuplicateFormatFlagsException;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@@ -13,9 +13,6 @@ import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
import de.srsoftware.tools.TagFilter;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
|
||||
Reference in New Issue
Block a user