fixed minor bugs, added import for From-Hell in Erfurt

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
2025-01-03 11:41:07 +01:00
parent 0d1215558e
commit 0102ba9f11
5 changed files with 225 additions and 6 deletions

View File

@@ -0,0 +1,219 @@
package de.srsoftware.cal.importer.erfurt;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Attachment;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.cal.api.Link;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Strings;
import de.srsoftware.tools.Tag;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.cal.Util.parseLongGermanDate;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Optionals.nullIfEmpty;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
import static java.nio.charset.StandardCharsets.UTF_8;
public class FromHell extends BaseImporter {
private static final Coords DEFAULT_COORDS = new Coords(50.97372, 10.9541);
private static final String DEFAULT_LOCATION = "Club From Hell, Flughafenstraße 41, 99092 Erfurt / Bindersleben";
public FromHell() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.clubfromhell.de";
}
@Override
public String description() {
return "Importer für Events des Club „From Hell“ in Erfurt";
}
@Override
protected List<Attachment> extractAttachments(Tag eventTag) {
return super.extractAttachments(eventTag).stream()
.filter(att -> !att.url().toString().contains("/images/geo/"))
.toList();
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeHas(CLASS,"pane-content");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeHas(CLASS,"views-field-body");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
var res = super.extractLocation(eventTag);
if (res.optional().isEmpty()) return transform(res);
var location = res.optional().get().trim();
var lower = location.toLowerCase();
if (lower.startsWith("club from hell")) return Payload.of(DEFAULT_COORDS);
return error("Unknown location: %s → cannot bind coordinates!",location);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return attributeHas(CLASS,"date-display-end");
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return attributeHas(CLASS,"date-display-end");
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return attributeHas(CLASS,"pane-events-uebersicht-alle");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
var list = opt.get().find(attributeContains(CLASS,"pane-events-uebersicht-alle-liste")).stream()
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
.map(tag -> tag.get(HREF))
.filter(Objects::nonNull)
.map(link -> link.contains("://") ? link : baseUrl()+link)
.distinct()
.toList();
return Payload.of(list);
}
@Override
protected List<Link> extractLinks(Tag appointmentTag) {
return super.extractLinks(appointmentTag).stream()
.filter(link -> !link.url().toString().contains("/genre/"))
.filter(link -> !link.url().toString().contains("/maps/"))
.filter(link -> nullIfEmpty(link.desciption()) != null)
.toList();
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeHas(CLASS,"pane-content");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
var res = super.extractLocation(eventTag);
if (res.optional().isEmpty()) return transform(res);
var location = res.optional().get().trim();
var lower = location.toLowerCase();
if (lower.startsWith("club from hell")) return Payload.of(DEFAULT_LOCATION);
return Payload.of(location);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return IS_ANCHOR.and(tag -> tag.parent().map(p -> p.get(ID)).filter(Objects::nonNull).map(s->s.contains("field_ort_short")).orElse(false));
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeHas(CLASS,"date-display-start");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeHas(CLASS,"date-display-start");
}
@Override
protected List<String> extractTags(Tag eventTag) {
var tags = new HashSet<String>();
tags.add("FromHell");
tags.add("Erfurt");
eventTag.find(attributeContains(CLASS,"views-field-field-genre")).stream()
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
.map(Tag::strip)
.map(Strings::camelCase)
.forEach(tags::add);
return List.copyOf(tags);
}
protected Result<String> extractTitle(Tag eventTag) {
Result<Tag> titleTag = extractTitleTag(eventTag);
if (titleTag.optional().isEmpty()) return transform(titleTag);
var inner = titleTag.optional().map(Tag::strip);
return inner.isPresent() ? Payload.of(inner.get().trim()) : error("No title found");
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return ofType("h2");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return parseLongGermanDate(string);
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return parseGermanTime(string);
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
return parseLongGermanDate(string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string);
}
/**
* Die FromHell-Seite haut einen fefekten Header raus, der den Parser lahmlegt
* Also schneiden wir den kompletten header ab...
* @param inputStream eingehender InputStream, verpackt in Result
* @return ausgehender InputStream, verpackt in Result
*/
@Override
protected Result<InputStream> preload(Result<InputStream> inputStream) {
var opt = inputStream.optional();
if (opt.isEmpty()) return transform(inputStream);
try {
var input = opt.get();
var bos = new ByteArrayOutputStream();
input.transferTo(bos);
input.close();
String code = bos.toString(UTF_8);
var pos = code.indexOf("<body");
return Payload.of(new ByteArrayInputStream(code.substring(pos).getBytes(UTF_8)));
} catch (IOException e) {
return error(e, "Failed to buffer data from %s", inputStream);
}
}
@Override
protected String programURL() {
return baseUrl()+"/events/uebersicht-alle_liste.html";
}
}