minor tweak to Wotufa importer, added AtParty importer

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
2025-01-02 19:29:34 +01:00
parent 87c67f4aee
commit c561be4b9c
6 changed files with 256 additions and 9 deletions

View File

@@ -0,0 +1,229 @@
package de.srsoftware.cal.importer.gera;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Attachment;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import static de.srsoftware.cal.Util.*;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Optionals.emptyIfNull;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.CLASS;
import static de.srsoftware.tools.Tag.HREF;
import static de.srsoftware.tools.TagFilter.*;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Map.of;
public class AtParty extends BaseImporter {
private static final Coords COORDS_HOFWIESENPARK = new Coords(50.87802, 12.07067);
private static final Coords COORDS_QUI = new Coords(50.87932, 12.07853);
private static final Coords COORDS_MUSIC_HALL = new Coords(50.87135, 12.07504);
private static final Coords COORDS_HBF = new Coords(50.88335, 12.07767);
public AtParty() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.at-party.de";
}
@Override
public String description() {
return "Importer for Events listed at at-party.de";
}
@Override
protected List<Attachment> extractAttachments(Tag eventTag) {
var combined = new HashSet<>(super.extractAttachments(eventTag));
eventTag.find(attributeHas(CLASS,"slider-area")).stream()
.map(tag -> extractBackgroundImage(tag,baseUrl()))
//.map(AtParty::dropQuery)
.map(Util::url)
.map(Util::toAttachment)
.map(Result::optional)
.flatMap(Optional::stream)
.forEach(combined::add);
return List.copyOf(combined);
}
private static Result<String> dropQuery(Result<String> s) {
var opt = s.optional();
if (opt.isEmpty()) return s;
return Payload.of(opt.get().replaceAll("\\?.*",""));
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeHas(CLASS,"event-details");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeHas(CLASS,"event-details");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
var res = super.extractLocation(eventTag);
var opt = res.optional();
if (opt.isEmpty()) return transform(res);
var loc = opt.get();
var lower = loc.toLowerCase();
if (lower.contains("seven")) return Payload.of(COORDS_HBF);
if (lower.contains("music hall")) return Payload.of(COORDS_MUSIC_HALL);
if (lower.contains("partyhaus")) return Payload.of(COORDS_QUI);
if (lower.contains("qui")) return Payload.of(COORDS_QUI);
if (lower.contains("hofwiesenpark")) return Payload.of(COORDS_HOFWIESENPARK);
return error("unknown location: %s",loc);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return attributeEquals(CLASS,"main-content");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
var list = opt.get().find(IS_ANCHOR)
.stream().map(tag -> tag.get(HREF))
.filter(Objects::nonNull)
.filter(link -> link.contains("/events/"))
.map(url -> url.contains("://")?url:baseUrl()+url)
.distinct()
.toList();
return Payload.of(list);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeHas(CLASS,"event-details");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
var res = super.extractLocation(eventTag);
var opt = res.optional();
if (opt.isEmpty()) return transform(res);
var loc = opt.get();
var lower = loc.toLowerCase();
if (lower.contains("seven")) return Payload.of("Club Seven, Bahnhofsplatz 6, 07545 Gera");
if (lower.contains("music hall")) return Payload.of("Music Hall Gera, Heinrichstraße 49, 07545 Gera");
if (lower.contains("partyhaus")) return Payload.of("Partyhaus, De-Smit-Str. 2, 07545 Gera");
if (lower.contains("qui")) return Payload.of("Club QUI Gera, De-Smit-Str. 2, 07545 Gera");
if (lower.contains("hofwiesenpark")) return Payload.of("Hofwiesenpark, 07545 Gera");
LOG.log(System.Logger.Level.ERROR,"unknown location: {0}",loc);
return Payload.of("Gera");
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return ofType("h1");
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeEquals(CLASS,"theme-gradient");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return ofType("h4").and(tag -> tag.strip().contains("Uhr"));
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of("Gera");
}
@Override
protected Result<String> extractTitle(Tag eventTag) {
var res = super.extractTitle(eventTag);
var opt = res.optional();
return opt.isEmpty() ? res : Payload.of(opt.get().split("\\|")[0].trim());
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
return parseLongGermanDate(string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string);
}
/**
* Die ATParty-Seite haut kaputte meta-tags mit raus.
* Also schneiden wir den kompletten header ab...
* @param inputStream eingehender InputStream, verpackt in Result
* @return ausgehender InputStream, verpackt in Result
*/
@Override
protected Result<InputStream> preload(Result<InputStream> inputStream) {
var opt = inputStream.optional();
if (opt.isEmpty()) return transform(inputStream);
try {
var input = opt.get();
var bos = new ByteArrayOutputStream();
input.transferTo(bos);
input.close();
String code = bos.toString(UTF_8);
var pos = code.indexOf("<body");
return Payload.of(new ByteArrayInputStream(code.substring(pos).getBytes(UTF_8)));
} catch (IOException e) {
return error(e, "Failed to buffer data from %s", inputStream);
}
}
@Override
protected String programURL() {
return baseUrl()+"/veranstaltungen";
}
}

View File

@@ -97,7 +97,9 @@ public class Wotufa extends BaseImporter {
Result<Tag> locationTag = extractLocationTag(eventTag);
if (locationTag.optional().isEmpty()) return transform(locationTag);
var loc = locationTag.optional().get().strip();
return Payload.of("Neustadt an der Orla".equals(loc) ? DEFAULT_LOCATION : loc);
var wotufa = loc.equals("Neustadt an der Orla") || loc.toLowerCase().contains("wotufa");
return Payload.of(wotufa ? DEFAULT_LOCATION : loc);
}
@Override