fixed minor bugs, added import for From-Hell in Erfurt
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -287,7 +287,7 @@ public class AutoImporter implements Runnable, ClassListener {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void classAdded(Class<?> aClass) {
|
public void classAdded(Class<?> aClass) {
|
||||||
if (Importer.class.isAssignableFrom(aClass) && aClass.getSimpleName().contains("Ebu")) try {
|
if (Importer.class.isAssignableFrom(aClass))) try {
|
||||||
var instance = aClass.getDeclaredConstructor().newInstance();
|
var instance = aClass.getDeclaredConstructor().newInstance();
|
||||||
importers.add((Importer) instance);
|
importers.add((Importer) instance);
|
||||||
lastImport = null;
|
lastImport = null;
|
||||||
|
|||||||
@@ -278,7 +278,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
var url = Payload.of(programURL());
|
var url = Payload.of(programURL());
|
||||||
Stream<Result<String>> urls = url(url)
|
Stream<Result<String>> urls = url(url)
|
||||||
.map(Util::open) //
|
.map(Util::open) //
|
||||||
.map(Util::preload)
|
.map(this::preload)
|
||||||
.map(Util::parseXML)
|
.map(Util::parseXML)
|
||||||
.map(this::extractEventUrls)
|
.map(this::extractEventUrls)
|
||||||
.stream();
|
.stream();
|
||||||
@@ -305,7 +305,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
.orElse(null);
|
.orElse(null);
|
||||||
return urlResult //
|
return urlResult //
|
||||||
.map(Util::open)
|
.map(Util::open)
|
||||||
.map(Util::preload)
|
.map(this::preload)
|
||||||
.map(Util::parseXML)
|
.map(Util::parseXML)
|
||||||
.map(this::extractEventTag)
|
.map(this::extractEventTag)
|
||||||
.map(tagResult -> extractEvent(tagResult, link));
|
.map(tagResult -> extractEvent(tagResult, link));
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ public class Util {
|
|||||||
public static final String VCALENDAR = "VCALENDAR";
|
public static final String VCALENDAR = "VCALENDAR";
|
||||||
|
|
||||||
public static final Pattern GERMAN_DATE_PATTERN = Pattern.compile("^\\D*(\\d\\d?)\\.(\\d\\d?)\\.(\\d{4})\\D");
|
public static final Pattern GERMAN_DATE_PATTERN = Pattern.compile("^\\D*(\\d\\d?)\\.(\\d\\d?)\\.(\\d{4})\\D");
|
||||||
public static final Pattern GERMAN_DATE_PATTERN_LONG = Pattern.compile("(\\d\\d?)\\.\\s*(\\w+)\\s+(\\d{4})\\D");
|
public static final Pattern GERMAN_DATE_PATTERN_LONG = Pattern.compile("(\\d\\d?)\\.?\\s*(\\w+)\\s+(\\d{4})\\D");
|
||||||
public static final Pattern GERMAN_TIME_PATTERN = Pattern.compile("(\\d\\d?):(\\d\\d?)(:(\\d\\d?))?\\D");
|
public static final Pattern GERMAN_TIME_PATTERN = Pattern.compile("(\\d\\d?):(\\d\\d?)(:(\\d\\d?))?\\D");
|
||||||
private static final Pattern BG_IMAGE_URL = Pattern.compile("background(-image)?:\\surl\\('?([^)]+)'?\\)");
|
private static final Pattern BG_IMAGE_URL = Pattern.compile("background(-image)?:\\surl\\('?([^)]+)'?\\)");
|
||||||
private static final System.Logger LOG = System.getLogger(Util.class.getSimpleName());
|
private static final System.Logger LOG = System.getLogger(Util.class.getSimpleName());
|
||||||
|
|||||||
@@ -286,14 +286,14 @@ public class MariaDB implements Database {
|
|||||||
try {
|
try {
|
||||||
extractCoords(results.getString(COORDS)).optional().ifPresent(appointment::coords);
|
extractCoords(results.getString(COORDS)).optional().ifPresent(appointment::coords);
|
||||||
} catch (SQLException e) {
|
} catch (SQLException e) {
|
||||||
LOG.log(WARNING, "Failed to read coordinates from database!");
|
LOG.log(TRACE, "Result set did not cointain coords!");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
var tags = nullIfEmpty(results.getString("tags"));
|
var tags = nullIfEmpty(results.getString("tags"));
|
||||||
if (tags != null) appointment.tags(tags.split(","));
|
if (tags != null) appointment.tags(tags.split(","));
|
||||||
} catch (SQLException e) {
|
} catch (SQLException e) {
|
||||||
LOG.log(WARNING, "Failed to read tags from database!");
|
LOG.log(TRACE, "Result set did not cointain tags!");
|
||||||
}
|
}
|
||||||
return Payload.of(appointment);
|
return Payload.of(appointment);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,219 @@
|
|||||||
|
package de.srsoftware.cal.importer.erfurt;
|
||||||
|
|
||||||
|
import de.srsoftware.cal.BaseImporter;
|
||||||
|
import de.srsoftware.cal.Util;
|
||||||
|
import de.srsoftware.cal.api.Attachment;
|
||||||
|
import de.srsoftware.cal.api.Coords;
|
||||||
|
import de.srsoftware.cal.api.Link;
|
||||||
|
import de.srsoftware.tools.Payload;
|
||||||
|
import de.srsoftware.tools.Result;
|
||||||
|
import de.srsoftware.tools.Strings;
|
||||||
|
import de.srsoftware.tools.Tag;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.LocalTime;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
|
import static de.srsoftware.cal.Util.parseGermanTime;
|
||||||
|
import static de.srsoftware.cal.Util.parseLongGermanDate;
|
||||||
|
import static de.srsoftware.tools.Error.error;
|
||||||
|
import static de.srsoftware.tools.Optionals.nullIfEmpty;
|
||||||
|
import static de.srsoftware.tools.Result.transform;
|
||||||
|
import static de.srsoftware.tools.Tag.*;
|
||||||
|
import static de.srsoftware.tools.TagFilter.*;
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
|
public class FromHell extends BaseImporter {
|
||||||
|
|
||||||
|
private static final Coords DEFAULT_COORDS = new Coords(50.97372, 10.9541);
|
||||||
|
private static final String DEFAULT_LOCATION = "Club From Hell, Flughafenstraße 41, 99092 Erfurt / Bindersleben";
|
||||||
|
public FromHell() throws NoSuchAlgorithmException {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String baseUrl() {
|
||||||
|
return "https://www.clubfromhell.de";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String description() {
|
||||||
|
return "Importer für Events des Club „From Hell“ in Erfurt";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<Attachment> extractAttachments(Tag eventTag) {
|
||||||
|
return super.extractAttachments(eventTag).stream()
|
||||||
|
.filter(att -> !att.url().toString().contains("/images/geo/"))
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||||
|
return attributeHas(CLASS,"pane-content");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractDescriptionFilter() {
|
||||||
|
return attributeHas(CLASS,"views-field-body");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||||
|
var res = super.extractLocation(eventTag);
|
||||||
|
if (res.optional().isEmpty()) return transform(res);
|
||||||
|
var location = res.optional().get().trim();
|
||||||
|
var lower = location.toLowerCase();
|
||||||
|
if (lower.startsWith("club from hell")) return Payload.of(DEFAULT_COORDS);
|
||||||
|
return error("Unknown location: %s → cannot bind coordinates!",location);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractEndDateFilter() {
|
||||||
|
return attributeHas(CLASS,"date-display-end");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractEndTimeFilter() {
|
||||||
|
return attributeHas(CLASS,"date-display-end");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractEventTagFilter() {
|
||||||
|
return attributeHas(CLASS,"pane-events-uebersicht-alle");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||||
|
var opt = programPage.optional();
|
||||||
|
if (opt.isEmpty()) return transform(programPage);
|
||||||
|
var list = opt.get().find(attributeContains(CLASS,"pane-events-uebersicht-alle-liste")).stream()
|
||||||
|
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
|
||||||
|
.map(tag -> tag.get(HREF))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(link -> link.contains("://") ? link : baseUrl()+link)
|
||||||
|
.distinct()
|
||||||
|
.toList();
|
||||||
|
return Payload.of(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<Link> extractLinks(Tag appointmentTag) {
|
||||||
|
return super.extractLinks(appointmentTag).stream()
|
||||||
|
.filter(link -> !link.url().toString().contains("/genre/"))
|
||||||
|
.filter(link -> !link.url().toString().contains("/maps/"))
|
||||||
|
.filter(link -> nullIfEmpty(link.desciption()) != null)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractLinksFilter() {
|
||||||
|
return attributeHas(CLASS,"pane-content");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<String> extractLocation(Tag eventTag) {
|
||||||
|
var res = super.extractLocation(eventTag);
|
||||||
|
if (res.optional().isEmpty()) return transform(res);
|
||||||
|
var location = res.optional().get().trim();
|
||||||
|
var lower = location.toLowerCase();
|
||||||
|
if (lower.startsWith("club from hell")) return Payload.of(DEFAULT_LOCATION);
|
||||||
|
return Payload.of(location);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractLocationFilter() {
|
||||||
|
return IS_ANCHOR.and(tag -> tag.parent().map(p -> p.get(ID)).filter(Objects::nonNull).map(s->s.contains("field_ort_short")).orElse(false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractStartDateFilter() {
|
||||||
|
return attributeHas(CLASS,"date-display-start");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractStartTimeFilter() {
|
||||||
|
return attributeHas(CLASS,"date-display-start");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> extractTags(Tag eventTag) {
|
||||||
|
var tags = new HashSet<String>();
|
||||||
|
tags.add("FromHell");
|
||||||
|
tags.add("Erfurt");
|
||||||
|
eventTag.find(attributeContains(CLASS,"views-field-field-genre")).stream()
|
||||||
|
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
|
||||||
|
.map(Tag::strip)
|
||||||
|
.map(Strings::camelCase)
|
||||||
|
.forEach(tags::add);
|
||||||
|
return List.copyOf(tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Result<String> extractTitle(Tag eventTag) {
|
||||||
|
Result<Tag> titleTag = extractTitleTag(eventTag);
|
||||||
|
if (titleTag.optional().isEmpty()) return transform(titleTag);
|
||||||
|
var inner = titleTag.optional().map(Tag::strip);
|
||||||
|
return inner.isPresent() ? Payload.of(inner.get().trim()) : error("No title found");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<Tag> extractTitleFilter() {
|
||||||
|
return ofType("h2");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDate> parseEndDate(String string) {
|
||||||
|
return parseLongGermanDate(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalTime> parseEndTime(String string) {
|
||||||
|
return parseGermanTime(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDate> parseStartDate(String string) {
|
||||||
|
return parseLongGermanDate(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalTime> parseStartTime(String string) {
|
||||||
|
return parseGermanTime(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Die FromHell-Seite haut einen fefekten Header raus, der den Parser lahmlegt
|
||||||
|
* Also schneiden wir den kompletten header ab...
|
||||||
|
* @param inputStream eingehender InputStream, verpackt in Result
|
||||||
|
* @return ausgehender InputStream, verpackt in Result
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Result<InputStream> preload(Result<InputStream> inputStream) {
|
||||||
|
var opt = inputStream.optional();
|
||||||
|
if (opt.isEmpty()) return transform(inputStream);
|
||||||
|
try {
|
||||||
|
var input = opt.get();
|
||||||
|
var bos = new ByteArrayOutputStream();
|
||||||
|
input.transferTo(bos);
|
||||||
|
input.close();
|
||||||
|
String code = bos.toString(UTF_8);
|
||||||
|
var pos = code.indexOf("<body");
|
||||||
|
return Payload.of(new ByteArrayInputStream(code.substring(pos).getBytes(UTF_8)));
|
||||||
|
} catch (IOException e) {
|
||||||
|
return error(e, "Failed to buffer data from %s", inputStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String programURL() {
|
||||||
|
return baseUrl()+"/events/uebersicht-alle_liste.html";
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user