Browse Source

implemented importer für Bandhaus Leipzug

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
main
Stephan Richter 4 months ago
parent
commit
6b0d3a6bc4
  1. 30
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java
  2. 20
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java
  3. 10
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Psychochor.java
  4. 180
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/leipzig/Bandhaus.java

30
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java

@ -1,9 +1,9 @@ @@ -1,9 +1,9 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal;
import static de.srsoftware.cal.Util.combine;
import static de.srsoftware.cal.Util.url;
import static de.srsoftware.cal.Util.*;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Optionals.nullIfEmpty;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.HREF;
import static de.srsoftware.tools.TagFilter.*;
@ -13,6 +13,8 @@ import de.srsoftware.tools.*; @@ -13,6 +13,8 @@ import de.srsoftware.tools.*;
import java.io.IOException;
import java.io.InputStream;
import java.net.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
@ -40,18 +42,18 @@ public abstract class BaseImporter implements Importer { @@ -40,18 +42,18 @@ public abstract class BaseImporter implements Importer {
protected List<Attachment> extractAttachments(Tag eventTag) {
return extractAttachmentsTag(eventTag) //
.optional()
.stream()
.flatMap(tag -> tag.find(IS_IMAGE).stream())
.map(tag -> tag.get("src"))
.filter(Objects::nonNull)
.map(url -> url.contains("://") ? url : baseUrl()+url)
.map(Payload::of)
.map(Util::url)
.map(Util::toAttachment)
.map(Result::optional)
.flatMap(Optional::stream)
.toList();
.optional()
.stream()
.flatMap(tag -> tag.find(IS_IMAGE).stream())
.map(tag -> tag.get("src"))
.filter(Objects::nonNull)
.map(url -> url.contains("://") ? url : baseUrl()+url)
.map(Payload::of)
.map(Util::url)
.map(Util::toAttachment)
.map(Result::optional)
.flatMap(Optional::stream)
.toList();
}
protected abstract Predicate<Tag> extractAttachmentsFilter();

20
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java

@ -11,8 +11,12 @@ import de.srsoftware.cal.api.Coords; @@ -11,8 +11,12 @@ import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
@ -75,6 +79,14 @@ public class Util { @@ -75,6 +79,14 @@ public class Util {
return String.join("\r\n",lines);
}
public static void dump(Tag tag){
try {
Files.writeString(Path.of("/tmp/dump.txt"),tag.toString(4));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static Result<Coords> extractCoords(String coords) {
if (coords == null) return error("Argument is null");
if (coords.isBlank()) return error("Argument is blank");
@ -197,6 +209,14 @@ public class Util { @@ -197,6 +209,14 @@ public class Util {
}
}
public static Result<Integer> parseInt(String s){
try {
return Payload.of(Integer.parseInt(s));
} catch (NumberFormatException e){
return error(e,"Failed to parse %s as integer!",s);
}
}
public static Result<URL> url(Result<String> urlResult) {
if (urlResult.optional().isEmpty()) return transform(urlResult);
var url = urlResult.optional().get();

10
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Psychochor.java

@ -47,11 +47,6 @@ public class Psychochor extends SinglePageImporter { @@ -47,11 +47,6 @@ public class Psychochor extends SinglePageImporter {
return attributeEquals("itemprop","description");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
var list = eventTag.find(ofType("iframe"));
@ -74,6 +69,11 @@ public class Psychochor extends SinglePageImporter { @@ -74,6 +69,11 @@ public class Psychochor extends SinglePageImporter {
return error("Failed to parse coords from %s",s);
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;

180
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/leipzig/Bandhaus.java

@ -0,0 +1,180 @@ @@ -0,0 +1,180 @@
package de.srsoftware.cal.importer.leipzig;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.tools.Error.error;
import static de.srsoftware.tools.Optionals.nullable;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
public class Bandhaus extends BaseImporter {
private static final Coords COORDS_BANDHAUS = new Coords(51.32498, 12.31578);
public Bandhaus() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://bandcommunity-leipzig.org";
}
@Override
public String description() {
return "Importer für Veranstaltungen des Bandhaus Leipzig";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeHas(CLASS,"event_description");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeEquals("itemprop","description");
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
var list = eventTag.find(attributeHas(CLASS,"evo_location_address"));
if (list.isEmpty()) return error("Failed to locate address → failed to locate coords");
var loc = list.getFirst().strip();
var lower = loc.toLowerCase();
if (lower.contains("saarländer")) return Payload.of(COORDS_BANDHAUS);
return error("Unknown location: %s",loc);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return attributeStartsWith(ID, "event_");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty())return transform(programPage);
var list = opt.get().find(attributeEquals(ID, "evcal_list")).stream()
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
.map(anchor -> anchor.get(HREF))
.filter(Objects::nonNull).filter(link-> link.contains("/events/"))
.map(link -> link.contains("://") ? link : baseUrl()+link)
.distinct().toList();
return Payload.of(list);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeEquals("itemprop","description");
}
protected Result<Tag> extractLocationTag(Tag eventTag){
var list = eventTag.find(extractLocationFilter()).stream()
.flatMap(tag -> tag.find(IS_PARAGRAPH).stream()).toList();
if (list.isEmpty()) return error("Failed to find location tag");
return Payload.of(Tag.of(DIV).addAll(list));
}
protected Result<String> extractLocation(Tag eventTag) {
var location = eventTag.find(extractLocationFilter()).stream()
.flatMap(tag -> tag.find(IS_PARAGRAPH).stream())
.map(Tag::strip).collect(Collectors.joining(", "));
return location.isBlank() ? error("Failed to find location!") : Payload.of(location);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return attributeContains(CLASS,"data_cell").and(tag -> !tag.find(attributeHas(CLASS,"evo_location_address")).isEmpty());
}
@Override
protected Result<LocalDate> extractStartDate(Tag eventTag) {
Result<Tag> startDateTag = extractStartDateTag(eventTag);
var opt = startDateTag.optional();
if (opt.isEmpty()) return transform(startDateTag);
var dateTag = opt.get();
Result<Integer> year = dateTag.find(withAttribute("data-syr")).stream()
.map(tag -> tag.get("data-syr"))
.map(Util::parseInt).findAny().orElseGet(() -> error("Failed to find start date"));
Result<Integer> day = dateTag.find(attributeEquals(CLASS, "date")).stream()
.map(Tag::strip).map(Util::parseInt).findAny().orElseGet(() -> error("Failed to find start date"));
Result<Integer> month = dateTag.find(attributeEquals(CLASS, "month")).stream()
.map(Tag::strip).map(Util::toNumericMonth).findAny().orElseGet(() -> error("Failed to find start date"));
if (year.optional().isEmpty()) return transform(year);
if (month.optional().isEmpty()) return transform(month);
if (day.optional().isEmpty()) return transform(day);
return Payload.of(LocalDate.of(year.optional().get(),month.optional().get(),day.optional().get()));
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return withAttribute("data-syr").and(tag -> !tag.find(attributeContains(CLASS,"evo_start")).isEmpty());
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeContains(CLASS,"evo_start");
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of("Bandhaus","Leipzig");
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return attributeHas(CLASS,"evcal_event_title");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string+" ");
}
@Override
protected String programURL() {
return baseUrl()+"/veranstaltungen";
}
}
Loading…
Cancel
Save