implemented importer für Bandhaus Leipzug
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal;
|
||||
|
||||
import static de.srsoftware.cal.Util.combine;
|
||||
import static de.srsoftware.cal.Util.url;
|
||||
import static de.srsoftware.cal.Util.*;
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Optionals.nullIfEmpty;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.HREF;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
@@ -13,6 +13,8 @@ import de.srsoftware.tools.*;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
@@ -40,18 +42,18 @@ public abstract class BaseImporter implements Importer {
|
||||
|
||||
protected List<Attachment> extractAttachments(Tag eventTag) {
|
||||
return extractAttachmentsTag(eventTag) //
|
||||
.optional()
|
||||
.stream()
|
||||
.flatMap(tag -> tag.find(IS_IMAGE).stream())
|
||||
.map(tag -> tag.get("src"))
|
||||
.filter(Objects::nonNull)
|
||||
.map(url -> url.contains("://") ? url : baseUrl()+url)
|
||||
.map(Payload::of)
|
||||
.map(Util::url)
|
||||
.map(Util::toAttachment)
|
||||
.map(Result::optional)
|
||||
.flatMap(Optional::stream)
|
||||
.toList();
|
||||
.optional()
|
||||
.stream()
|
||||
.flatMap(tag -> tag.find(IS_IMAGE).stream())
|
||||
.map(tag -> tag.get("src"))
|
||||
.filter(Objects::nonNull)
|
||||
.map(url -> url.contains("://") ? url : baseUrl()+url)
|
||||
.map(Payload::of)
|
||||
.map(Util::url)
|
||||
.map(Util::toAttachment)
|
||||
.map(Result::optional)
|
||||
.flatMap(Optional::stream)
|
||||
.toList();
|
||||
}
|
||||
|
||||
protected abstract Predicate<Tag> extractAttachmentsFilter();
|
||||
|
||||
@@ -11,8 +11,12 @@ import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.LocalTime;
|
||||
@@ -75,6 +79,14 @@ public class Util {
|
||||
return String.join("\r\n",lines);
|
||||
}
|
||||
|
||||
public static void dump(Tag tag){
|
||||
try {
|
||||
Files.writeString(Path.of("/tmp/dump.txt"),tag.toString(4));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static Result<Coords> extractCoords(String coords) {
|
||||
if (coords == null) return error("Argument is null");
|
||||
if (coords.isBlank()) return error("Argument is blank");
|
||||
@@ -197,6 +209,14 @@ public class Util {
|
||||
}
|
||||
}
|
||||
|
||||
public static Result<Integer> parseInt(String s){
|
||||
try {
|
||||
return Payload.of(Integer.parseInt(s));
|
||||
} catch (NumberFormatException e){
|
||||
return error(e,"Failed to parse %s as integer!",s);
|
||||
}
|
||||
}
|
||||
|
||||
public static Result<URL> url(Result<String> urlResult) {
|
||||
if (urlResult.optional().isEmpty()) return transform(urlResult);
|
||||
var url = urlResult.optional().get();
|
||||
|
||||
@@ -47,11 +47,6 @@ public class Psychochor extends SinglePageImporter {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
var list = eventTag.find(ofType("iframe"));
|
||||
@@ -74,6 +69,11 @@ public class Psychochor extends SinglePageImporter {
|
||||
return error("Failed to parse coords from %s",s);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndDateFilter() {
|
||||
return null;
|
||||
|
||||
@@ -0,0 +1,180 @@
|
||||
package de.srsoftware.cal.importer.leipzig;
|
||||
|
||||
import de.srsoftware.cal.BaseImporter;
|
||||
import de.srsoftware.cal.Util;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.Payload;
|
||||
import de.srsoftware.tools.Result;
|
||||
import de.srsoftware.tools.Tag;
|
||||
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static de.srsoftware.cal.Util.parseGermanTime;
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Optionals.nullable;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.*;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
|
||||
public class Bandhaus extends BaseImporter {
|
||||
private static final Coords COORDS_BANDHAUS = new Coords(51.32498, 12.31578);
|
||||
|
||||
public Bandhaus() throws NoSuchAlgorithmException {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String baseUrl() {
|
||||
return "https://bandcommunity-leipzig.org";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Importer für Veranstaltungen des Bandhaus Leipzig";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||
return attributeHas(CLASS,"event_description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
var list = eventTag.find(attributeHas(CLASS,"evo_location_address"));
|
||||
if (list.isEmpty()) return error("Failed to locate address → failed to locate coords");
|
||||
var loc = list.getFirst().strip();
|
||||
var lower = loc.toLowerCase();
|
||||
if (lower.contains("saarländer")) return Payload.of(COORDS_BANDHAUS);
|
||||
return error("Unknown location: %s",loc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndDateFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndTimeFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEventTagFilter() {
|
||||
return attributeStartsWith(ID, "event_");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||
var opt = programPage.optional();
|
||||
if (opt.isEmpty())return transform(programPage);
|
||||
var list = opt.get().find(attributeEquals(ID, "evcal_list")).stream()
|
||||
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
|
||||
.map(anchor -> anchor.get(HREF))
|
||||
.filter(Objects::nonNull).filter(link-> link.contains("/events/"))
|
||||
.map(link -> link.contains("://") ? link : baseUrl()+link)
|
||||
.distinct().toList();
|
||||
return Payload.of(list);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLinksFilter() {
|
||||
return attributeEquals("itemprop","description");
|
||||
}
|
||||
|
||||
protected Result<Tag> extractLocationTag(Tag eventTag){
|
||||
var list = eventTag.find(extractLocationFilter()).stream()
|
||||
.flatMap(tag -> tag.find(IS_PARAGRAPH).stream()).toList();
|
||||
if (list.isEmpty()) return error("Failed to find location tag");
|
||||
return Payload.of(Tag.of(DIV).addAll(list));
|
||||
}
|
||||
|
||||
protected Result<String> extractLocation(Tag eventTag) {
|
||||
var location = eventTag.find(extractLocationFilter()).stream()
|
||||
.flatMap(tag -> tag.find(IS_PARAGRAPH).stream())
|
||||
.map(Tag::strip).collect(Collectors.joining(", "));
|
||||
|
||||
return location.isBlank() ? error("Failed to find location!") : Payload.of(location);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLocationFilter() {
|
||||
return attributeContains(CLASS,"data_cell").and(tag -> !tag.find(attributeHas(CLASS,"evo_location_address")).isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> extractStartDate(Tag eventTag) {
|
||||
Result<Tag> startDateTag = extractStartDateTag(eventTag);
|
||||
var opt = startDateTag.optional();
|
||||
if (opt.isEmpty()) return transform(startDateTag);
|
||||
var dateTag = opt.get();
|
||||
Result<Integer> year = dateTag.find(withAttribute("data-syr")).stream()
|
||||
.map(tag -> tag.get("data-syr"))
|
||||
.map(Util::parseInt).findAny().orElseGet(() -> error("Failed to find start date"));
|
||||
Result<Integer> day = dateTag.find(attributeEquals(CLASS, "date")).stream()
|
||||
.map(Tag::strip).map(Util::parseInt).findAny().orElseGet(() -> error("Failed to find start date"));
|
||||
Result<Integer> month = dateTag.find(attributeEquals(CLASS, "month")).stream()
|
||||
.map(Tag::strip).map(Util::toNumericMonth).findAny().orElseGet(() -> error("Failed to find start date"));
|
||||
if (year.optional().isEmpty()) return transform(year);
|
||||
if (month.optional().isEmpty()) return transform(month);
|
||||
if (day.optional().isEmpty()) return transform(day);
|
||||
return Payload.of(LocalDate.of(year.optional().get(),month.optional().get(),day.optional().get()));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartDateFilter() {
|
||||
return withAttribute("data-syr").and(tag -> !tag.find(attributeContains(CLASS,"evo_start")).isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartTimeFilter() {
|
||||
return attributeContains(CLASS,"evo_start");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> extractTags(Tag eventTag) {
|
||||
return List.of("Bandhaus","Leipzig");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractTitleFilter() {
|
||||
return attributeHas(CLASS,"evcal_event_title");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseEndDate(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseEndTime(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseStartDate(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseStartTime(String string) {
|
||||
return parseGermanTime(string+" ");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String programURL() {
|
||||
return baseUrl()+"/veranstaltungen";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user