4 changed files with 183 additions and 7 deletions
@ -0,0 +1,175 @@
@@ -0,0 +1,175 @@
|
||||
package de.srsoftware.cal.importer.erfurt; |
||||
|
||||
import de.srsoftware.cal.BaseImporter; |
||||
import de.srsoftware.cal.Util; |
||||
import de.srsoftware.cal.api.Coords; |
||||
import de.srsoftware.cal.api.Link; |
||||
import de.srsoftware.tools.Payload; |
||||
import de.srsoftware.tools.Result; |
||||
import de.srsoftware.tools.Tag; |
||||
import de.srsoftware.tools.TagFilter; |
||||
|
||||
import java.security.NoSuchAlgorithmException; |
||||
import java.time.LocalDate; |
||||
import java.time.LocalTime; |
||||
import java.util.List; |
||||
import java.util.Objects; |
||||
import java.util.function.Predicate; |
||||
import java.util.regex.Pattern; |
||||
|
||||
import static de.srsoftware.cal.Util.dump; |
||||
import static de.srsoftware.cal.Util.url; |
||||
import static de.srsoftware.tools.Error.error; |
||||
import static de.srsoftware.tools.Result.transform; |
||||
import static de.srsoftware.tools.Tag.CLASS; |
||||
import static de.srsoftware.tools.Tag.HREF; |
||||
import static de.srsoftware.tools.TagFilter.*; |
||||
import static java.lang.System.Logger.Level.INFO; |
||||
|
||||
public class Eburg extends BaseImporter { |
||||
private static final Coords DEFAULT_COORDS = new Coords(50.97840, 11.027004); |
||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d\\d?)\\.\\s*(\\w+)\\W+(\\d\\d?)[.:](\\d\\d?)"); |
||||
public static final String DEFAULT_LOCATION = "Engelsburg, Allerheiligenstraße 20/21, 99084 Erfurt"; |
||||
public Eburg() throws NoSuchAlgorithmException { |
||||
super(); |
||||
} |
||||
|
||||
@Override |
||||
protected String baseUrl() { |
||||
return "https://engelsburg.club"; |
||||
} |
||||
|
||||
@Override |
||||
public String description() { |
||||
return "Importer für Events der Engelsburg in ERfurt"; |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractAttachmentsFilter() { |
||||
return attributeHas(CLASS,"post-content"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractDescriptionFilter() { |
||||
return attributeEquals(CLASS,"row"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Coords> extractCoords(Tag eventTag) { |
||||
return Payload.of(DEFAULT_COORDS); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractEndDateFilter() { |
||||
return null; |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractEndTimeFilter() { |
||||
return null; |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractEventTagFilter() { |
||||
return attributeHas(CLASS,"post-content"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) { |
||||
if (programPage.optional().isEmpty()) return transform(programPage); |
||||
var tag = programPage.optional().get(); |
||||
var list = tag.find(attributeHas(CLASS,"program-teaser")).stream() |
||||
.flatMap(teaser -> teaser.find(IS_ANCHOR).stream()) |
||||
.map(a -> a.get(HREF)) |
||||
.filter(Objects::nonNull) |
||||
.map(url -> url.contains("://") ? url : baseUrl()+url) |
||||
.filter(url -> url.startsWith(baseUrl())) |
||||
.distinct() |
||||
.toList(); |
||||
return Payload.of(list); |
||||
} |
||||
|
||||
protected List<Link> extractLinks(Tag appointmentTag) { |
||||
return super.extractLinks(appointmentTag).stream().filter(link -> !link.url().toString().contains("program-punkt")).toList(); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractLinksFilter() { |
||||
return attributeEquals(CLASS,"row"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<String> extractLocation(Tag eventTag) { |
||||
var res = super.extractLocation(eventTag); |
||||
var loc = DEFAULT_LOCATION + (res.optional().isPresent() ? ", "+res.optional().get() : ""); |
||||
return Payload.of(loc); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractLocationFilter() { |
||||
return attributeHas(CLASS,"location"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractStartDateFilter() { |
||||
return ofType("h4"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractStartTimeFilter() { |
||||
return ofType("h4"); |
||||
} |
||||
|
||||
@Override |
||||
protected List<String> extractTags(Tag eventTag) { |
||||
return List.of("Engelsburg","Erfurt"); |
||||
} |
||||
|
||||
@Override |
||||
protected Predicate<Tag> extractTitleFilter() { |
||||
return ofType("h1"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDate> parseEndDate(String string) { |
||||
return null; |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalTime> parseEndTime(String string) { |
||||
return null; |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDate> parseStartDate(String string) { |
||||
var matcher = DATE_PATTERN.matcher(string); |
||||
if (matcher.find()){ |
||||
int day = Integer.parseInt(matcher.group(1)); |
||||
var m = Util.toNumericMonth(matcher.group(2)); |
||||
if (m.optional().isEmpty()) return transform(m); |
||||
int month = m.optional().get(); |
||||
var now = LocalDate.now(); |
||||
int year = now.getYear(); |
||||
var start = LocalDate.of(year,month,day); |
||||
if (start.isBefore(now)) start = start.withYear(year+1); |
||||
return Payload.of(start); |
||||
} |
||||
return error("Failed to parse date from %s",string); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalTime> parseStartTime(String string) { |
||||
var matcher = DATE_PATTERN.matcher(string); |
||||
if (matcher.find()){ |
||||
int hour = Integer.parseInt(matcher.group(3)); |
||||
int min = Integer.parseInt(matcher.group(4)); |
||||
return Payload.of(LocalTime.of(hour,min)); |
||||
} |
||||
return error("Failed to parse time from %s",string); |
||||
} |
||||
|
||||
@Override |
||||
protected String programURL() { |
||||
return baseUrl()+"/programm"; |
||||
} |
||||
} |
Loading…
Reference in new issue