5 changed files with 225 additions and 6 deletions
@ -0,0 +1,219 @@
@@ -0,0 +1,219 @@
|
||||
package de.srsoftware.cal.importer.erfurt; |
||||
|
||||
import de.srsoftware.cal.BaseImporter; |
||||
import de.srsoftware.cal.Util; |
||||
import de.srsoftware.cal.api.Attachment; |
||||
import de.srsoftware.cal.api.Coords; |
||||
import de.srsoftware.cal.api.Link; |
||||
import de.srsoftware.tools.Payload; |
||||
import de.srsoftware.tools.Result; |
||||
import de.srsoftware.tools.Strings; |
||||
import de.srsoftware.tools.Tag; |
||||
|
||||
import java.io.ByteArrayInputStream; |
||||
import java.io.ByteArrayOutputStream; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.security.NoSuchAlgorithmException; |
||||
import java.time.LocalDate; |
||||
import java.time.LocalTime; |
||||
import java.util.HashSet; |
||||
import java.util.List; |
||||
import java.util.Objects; |
||||
import java.util.function.Predicate; |
||||
|
||||
import static de.srsoftware.cal.Util.parseGermanTime; |
||||
import static de.srsoftware.cal.Util.parseLongGermanDate; |
||||
import static de.srsoftware.tools.Error.error; |
||||
import static de.srsoftware.tools.Optionals.nullIfEmpty; |
||||
import static de.srsoftware.tools.Result.transform; |
||||
import static de.srsoftware.tools.Tag.*; |
||||
import static de.srsoftware.tools.TagFilter.*; |
||||
import static java.nio.charset.StandardCharsets.UTF_8; |
||||
|
||||
public class FromHell extends BaseImporter { |
||||
|
||||
private static final Coords DEFAULT_COORDS = new Coords(50.97372, 10.9541); |
||||
private static final String DEFAULT_LOCATION = "Club From Hell, Flughafenstraße 41, 99092 Erfurt / Bindersleben"; |
||||
public FromHell() throws NoSuchAlgorithmException { |
||||
super(); |
||||
} |
||||
|
||||
@Override |
||||
protected String baseUrl() { |
||||
return "https://www.clubfromhell.de"; |
||||
} |
||||
|
||||
@Override |
||||
public String description() { |
||||
return "Importer für Events des Club „From Hell“ in Erfurt"; |
||||
} |
||||
|
||||
@Override |
||||
protected List<Attachment> extractAttachments(Tag eventTag) { |
||||
return super.extractAttachments(eventTag).stream() |
||||
.filter(att -> !att.url().toString().contains("/images/geo/")) |
||||
.toList(); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractAttachmentsFilter() { |
||||
return attributeHas(CLASS,"pane-content"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractDescriptionFilter() { |
||||
return attributeHas(CLASS,"views-field-body"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Coords> extractCoords(Tag eventTag) { |
||||
var res = super.extractLocation(eventTag); |
||||
if (res.optional().isEmpty()) return transform(res); |
||||
var location = res.optional().get().trim(); |
||||
var lower = location.toLowerCase(); |
||||
if (lower.startsWith("club from hell")) return Payload.of(DEFAULT_COORDS); |
||||
return error("Unknown location: %s → cannot bind coordinates!",location); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractEndDateFilter() { |
||||
return attributeHas(CLASS,"date-display-end"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractEndTimeFilter() { |
||||
return attributeHas(CLASS,"date-display-end"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractEventTagFilter() { |
||||
return attributeHas(CLASS,"pane-events-uebersicht-alle"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) { |
||||
var opt = programPage.optional(); |
||||
if (opt.isEmpty()) return transform(programPage); |
||||
var list = opt.get().find(attributeContains(CLASS,"pane-events-uebersicht-alle-liste")).stream() |
||||
.flatMap(tag -> tag.find(IS_ANCHOR).stream()) |
||||
.map(tag -> tag.get(HREF)) |
||||
.filter(Objects::nonNull) |
||||
.map(link -> link.contains("://") ? link : baseUrl()+link) |
||||
.distinct() |
||||
.toList(); |
||||
return Payload.of(list); |
||||
} |
||||
|
||||
@Override |
||||
protected List<Link> extractLinks(Tag appointmentTag) { |
||||
return super.extractLinks(appointmentTag).stream() |
||||
.filter(link -> !link.url().toString().contains("/genre/")) |
||||
.filter(link -> !link.url().toString().contains("/maps/")) |
||||
.filter(link -> nullIfEmpty(link.desciption()) != null) |
||||
.toList(); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractLinksFilter() { |
||||
return attributeHas(CLASS,"pane-content"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<String> extractLocation(Tag eventTag) { |
||||
var res = super.extractLocation(eventTag); |
||||
if (res.optional().isEmpty()) return transform(res); |
||||
var location = res.optional().get().trim(); |
||||
var lower = location.toLowerCase(); |
||||
if (lower.startsWith("club from hell")) return Payload.of(DEFAULT_LOCATION); |
||||
return Payload.of(location); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractLocationFilter() { |
||||
return IS_ANCHOR.and(tag -> tag.parent().map(p -> p.get(ID)).filter(Objects::nonNull).map(s->s.contains("field_ort_short")).orElse(false)); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractStartDateFilter() { |
||||
return attributeHas(CLASS,"date-display-start"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractStartTimeFilter() { |
||||
return attributeHas(CLASS,"date-display-start"); |
||||
} |
||||
|
||||
@Override |
||||
protected List<String> extractTags(Tag eventTag) { |
||||
var tags = new HashSet<String>(); |
||||
tags.add("FromHell"); |
||||
tags.add("Erfurt"); |
||||
eventTag.find(attributeContains(CLASS,"views-field-field-genre")).stream() |
||||
.flatMap(tag -> tag.find(IS_ANCHOR).stream()) |
||||
.map(Tag::strip) |
||||
.map(Strings::camelCase) |
||||
.forEach(tags::add); |
||||
return List.copyOf(tags); |
||||
} |
||||
|
||||
protected Result<String> extractTitle(Tag eventTag) { |
||||
Result<Tag> titleTag = extractTitleTag(eventTag); |
||||
if (titleTag.optional().isEmpty()) return transform(titleTag); |
||||
var inner = titleTag.optional().map(Tag::strip); |
||||
return inner.isPresent() ? Payload.of(inner.get().trim()) : error("No title found"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractTitleFilter() { |
||||
return ofType("h2"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDate> parseEndDate(String string) { |
||||
return parseLongGermanDate(string); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalTime> parseEndTime(String string) { |
||||
return parseGermanTime(string); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDate> parseStartDate(String string) { |
||||
return parseLongGermanDate(string); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalTime> parseStartTime(String string) { |
||||
return parseGermanTime(string); |
||||
} |
||||
|
||||
/** |
||||
* Die FromHell-Seite haut einen fefekten Header raus, der den Parser lahmlegt |
||||
* Also schneiden wir den kompletten header ab... |
||||
* @param inputStream eingehender InputStream, verpackt in Result |
||||
* @return ausgehender InputStream, verpackt in Result |
||||
*/ |
||||
@Override |
||||
protected Result<InputStream> preload(Result<InputStream> inputStream) { |
||||
var opt = inputStream.optional(); |
||||
if (opt.isEmpty()) return transform(inputStream); |
||||
try { |
||||
var input = opt.get(); |
||||
var bos = new ByteArrayOutputStream(); |
||||
input.transferTo(bos); |
||||
input.close(); |
||||
String code = bos.toString(UTF_8); |
||||
var pos = code.indexOf("<body"); |
||||
return Payload.of(new ByteArrayInputStream(code.substring(pos).getBytes(UTF_8))); |
||||
} catch (IOException e) { |
||||
return error(e, "Failed to buffer data from %s", inputStream); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
protected String programURL() { |
||||
return baseUrl()+"/events/uebersicht-alle_liste.html"; |
||||
} |
||||
} |
Loading…
Reference in new issue