Browse Source

added importer for Gerberstraße Weimar

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
main
Stephan Richter 4 months ago
parent
commit
364146d36f
  1. 1
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java
  2. 4
      de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java
  3. 146
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/weimar/Gerberstrasse.java

1
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/BaseImporter.java

@ -193,6 +193,7 @@ public abstract class BaseImporter implements Importer { @@ -193,6 +193,7 @@ public abstract class BaseImporter implements Importer {
return url(Payload.of(href)).optional().map(url -> new Link(url,txt)).orElse(null);
})
.filter(Objects::nonNull)
.filter(link -> link.desciption() != null)
.toList();
}

4
de.srsoftware.cal.base/src/main/java/de/srsoftware/cal/Util.java

@ -126,7 +126,7 @@ public class Util { @@ -126,7 +126,7 @@ public class Util {
}
public static Result<LocalDate> parseGermanDate(String s){
var match = GERMAN_DATE_PATTERN.matcher(" "+s+" ");
var match = GERMAN_DATE_PATTERN.matcher(" "+s.trim()+" ");
if (match.find()){
var day = Integer.parseInt(match.group(1));
var month = Integer.parseInt(match.group(2));
@ -150,7 +150,7 @@ public class Util { @@ -150,7 +150,7 @@ public class Util {
}
public static Result<LocalTime> parseGermanTime(String s){
var match = GERMAN_TIME_PATTERN.matcher(s);
var match = GERMAN_TIME_PATTERN.matcher(" "+s.trim()+" ");
if (match.find()){
var hour = Integer.parseInt(match.group(1));
var minute = Integer.parseInt(match.group(2));

146
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/weimar/Gerberstrasse.java

@ -0,0 +1,146 @@ @@ -0,0 +1,146 @@
package de.srsoftware.cal.importer.weimar;
import de.srsoftware.cal.BaseImporter;
import de.srsoftware.cal.Util;
import de.srsoftware.cal.api.Coords;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.time.LocalTime;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
import static de.srsoftware.cal.Util.parseGermanDate;
import static de.srsoftware.cal.Util.parseGermanTime;
import static de.srsoftware.tools.Result.transform;
import static de.srsoftware.tools.Tag.*;
import static de.srsoftware.tools.TagFilter.*;
public class Gerberstrasse extends BaseImporter {
private static final Coords DEFAULT_COORDS = new Coords(50.98276, 11.3311);
private static final String DEFAULT_LOCATION = "Gerberstraße 3, 99423 Weimar";
public Gerberstrasse() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return "https://www.gerberstrasse.org";
}
@Override
public String description() {
return "Importer für Veranstaltungen der Gerberstraße 3 in Weimar";
}
@Override
protected Predicate<Tag> extractAttachmentsFilter() {
return attributeEquals(CLASS,"article");
}
@Override
protected Predicate<Tag> extractDescriptionFilter() {
return attributeStartsWith(CLASS,"col-").and(tag -> !tag.find(attributeContains(CLASS,"-related-")).isEmpty());
}
@Override
protected Result<Coords> extractCoords(Tag eventTag) {
return Payload.of(DEFAULT_COORDS);
}
@Override
protected Predicate<Tag> extractEndDateFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEndTimeFilter() {
return null;
}
@Override
protected Predicate<Tag> extractEventTagFilter() {
return attributeEquals(CLASS,"article");
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
var opt = programPage.optional();
if (opt.isEmpty()) return transform(programPage);
var list = opt.get().find(attributeEquals(CLASS,"news-list-view")).stream()
.findFirst().stream()
.flatMap(tag -> tag.find(IS_ANCHOR).stream())
.map(tag -> tag.get(HREF))
.filter(Objects::nonNull)
.filter(link -> link.contains("/veranstaltung/"))
.distinct()
.map(url -> url.contains("://")?url : baseUrl()+url)
.toList();
return Payload.of(list);
}
@Override
protected Predicate<Tag> extractLinksFilter() {
return attributeEquals(CLASS,"article");
}
@Override
protected Result<String> extractLocation(Tag eventTag) {
return Payload.of(DEFAULT_LOCATION);
}
@Override
protected Predicate<Tag> extractLocationFilter() {
return null;
}
@Override
protected Predicate<Tag> extractStartDateFilter() {
return attributeEquals("itemprop","datePublished");
}
@Override
protected Predicate<Tag> extractStartTimeFilter() {
return attributeEquals("itemprop","datePublished");
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of("Gerberstraße","Weimar","besetztesHaus");
}
@Override
protected Predicate<Tag> extractTitleFilter() {
return ofType("h1");
}
@Override
protected Result<LocalDate> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalTime> parseEndTime(String string) {
return null;
}
@Override
protected Result<LocalDate> parseStartDate(String string) {
return parseGermanDate(string);
}
@Override
protected Result<LocalTime> parseStartTime(String string) {
return parseGermanTime(string);
}
@Override
protected String programURL() {
return baseUrl()+"/veranstaltungen";
}
}
Loading…
Cancel
Save