|
|
|
@ -1,12 +1,17 @@
@@ -1,12 +1,17 @@
|
|
|
|
|
/* © SRSoftware 2024 */ |
|
|
|
|
package de.srsoftware.cal.importer.jena; |
|
|
|
|
|
|
|
|
|
import static de.srsoftware.cal.Util.parseGermanDate; |
|
|
|
|
import static de.srsoftware.cal.Util.parseGermanTime; |
|
|
|
|
import static de.srsoftware.tools.Error.error; |
|
|
|
|
import static de.srsoftware.tools.Result.transform; |
|
|
|
|
import static de.srsoftware.tools.Tag.CLASS; |
|
|
|
|
import static de.srsoftware.tools.TagFilter.*; |
|
|
|
|
import static java.nio.charset.StandardCharsets.UTF_8; |
|
|
|
|
|
|
|
|
|
import de.srsoftware.cal.BaseImporter; |
|
|
|
|
import de.srsoftware.cal.Util; |
|
|
|
|
import de.srsoftware.cal.api.Coords; |
|
|
|
|
import de.srsoftware.tools.*; |
|
|
|
|
import java.io.ByteArrayInputStream; |
|
|
|
|
import java.io.ByteArrayOutputStream; |
|
|
|
@ -16,11 +21,16 @@ import java.nio.file.Files;
@@ -16,11 +21,16 @@ import java.nio.file.Files;
|
|
|
|
|
import java.nio.file.Path; |
|
|
|
|
import java.security.NoSuchAlgorithmException; |
|
|
|
|
import java.time.LocalDate; |
|
|
|
|
import java.time.LocalTime; |
|
|
|
|
import java.util.DuplicateFormatFlagsException; |
|
|
|
|
import java.util.List; |
|
|
|
|
import java.util.function.Predicate; |
|
|
|
|
import java.util.regex.Pattern; |
|
|
|
|
|
|
|
|
|
public abstract class CosmicDawn extends BaseImporter { |
|
|
|
|
public class CosmicDawn extends BaseImporter { |
|
|
|
|
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d\\d?).(\\d\\d?).(\\d{4}).*(\\d\\d?):(\\d\\d?)"); |
|
|
|
|
private static final String DEFAULT_LOCATION = "Cosmic Dawn e.V., Spitzweidenweg 28, 07743 Jena"; |
|
|
|
|
private static final Coords DEFAULT_COORDS = new Coords(50.93663, 11.59254); |
|
|
|
|
|
|
|
|
|
public CosmicDawn() throws NoSuchAlgorithmException { |
|
|
|
|
super(); |
|
|
|
@ -32,20 +42,45 @@ public abstract class CosmicDawn extends BaseImporter {
@@ -32,20 +42,45 @@ public abstract class CosmicDawn extends BaseImporter {
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Result<Tag> extractDescriptionTag(Tag eventTag) { |
|
|
|
|
var list = eventTag.find(attributeEndsWith("class","event-body-content")); |
|
|
|
|
return list.isEmpty() ? error("failed to find <div class=\"…event-body-content\">") : Payload.of(list.getFirst()); |
|
|
|
|
public String description() { |
|
|
|
|
return "Importiert Events des Kulturbahnhofs in Jena"; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
protected Result<Tag> extractEndTag(Tag eventTag) { |
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractAttachmentsFilter() { |
|
|
|
|
return attributeContains(CLASS,"single-event-page"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractDescriptionFilter() { |
|
|
|
|
return attributeContains(CLASS,"event-body-content"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Result<Coords> extractCoords(Tag eventTag) { |
|
|
|
|
return Payload.of(DEFAULT_COORDS); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractEndDateFilter() { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractEndTimeFilter() { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
protected Result<Tag> extractEventTag(Result<Tag> pageResult) { |
|
|
|
|
if (pageResult.optional().isEmpty()) return transform(pageResult); |
|
|
|
|
List<Tag> list = pageResult.optional().get().find(attributeEquals("class", "inside-article")); |
|
|
|
|
return (list.isEmpty()) ? error("Failed to find <div class=\"inside-article\">!") : Payload.of(list.getFirst()); |
|
|
|
|
var res = super.extractEventTag(pageResult); |
|
|
|
|
// remove youtube embeddings
|
|
|
|
|
if (res instanceof Payload<Tag> payload) payload.get().find(attributeContains(CLASS,"youtube")).forEach(Tag::remove); |
|
|
|
|
return res; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractEventTagFilter() { |
|
|
|
|
return attributeEquals(CLASS,"inside-article"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
@ -65,33 +100,39 @@ public abstract class CosmicDawn extends BaseImporter {
@@ -65,33 +100,39 @@ public abstract class CosmicDawn extends BaseImporter {
|
|
|
|
|
return Payload.of(urlList); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractLinksFilter() { |
|
|
|
|
return attributeContains(CLASS,"single-event-wrapper"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Result<String> extractLocation(Tag eventTag) { |
|
|
|
|
return Payload.of(DEFAULT_LOCATION); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Result<Tag> extractLocationTag(Tag eventTag) { |
|
|
|
|
protected Predicate<Tag> extractLocationFilter() { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
protected Result<Tag> extractStartTag(Tag eventTag) { |
|
|
|
|
var dateTags = eventTag.find(attributeContains("class","event-date-time")); |
|
|
|
|
if (dateTags.isEmpty()) return error("Start date not found!"); |
|
|
|
|
var times = eventTag.find(attributeEquals("class","event_time")).stream() |
|
|
|
|
.flatMap(tag -> tag.find(IS_SPAN).stream()) |
|
|
|
|
.filter(tag -> tag.toString().contains("Begin")) |
|
|
|
|
.toList(); |
|
|
|
|
if (times.isEmpty()) return error("Start time not found!"); |
|
|
|
|
var div = Tag.of("div").add(dateTags.getFirst()).add(times.getFirst()); |
|
|
|
|
return Payload.of(div); |
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractStartDateFilter() { |
|
|
|
|
return attributeEquals("itemprop","startDate"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Predicate<Tag> extractStartTimeFilter() { |
|
|
|
|
return attributeEquals(CLASS,"event_time"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected List<String> extractTags(Tag eventTag) { |
|
|
|
|
return List.of(); |
|
|
|
|
return List.of("Kulturbahnhof","Jena","CosmicDawn"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Result<Tag> extractTitleTag(Tag eventTag) { |
|
|
|
|
var list = eventTag.find(ofType("h1")); |
|
|
|
|
return list.isEmpty() ? error("failed to find <h1>") : Payload.of(list.getFirst()); |
|
|
|
|
protected Predicate<Tag> extractTitleFilter() { |
|
|
|
|
return ofType("h1"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
@ -100,19 +141,21 @@ public abstract class CosmicDawn extends BaseImporter {
@@ -100,19 +141,21 @@ public abstract class CosmicDawn extends BaseImporter {
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Result<LocalDate> parseStartDate(String date) { |
|
|
|
|
var matcher = START_DATE_PATTERN.matcher(date); |
|
|
|
|
if (matcher.find()){ |
|
|
|
|
int day = Integer.parseInt(matcher.group(1)); |
|
|
|
|
int mon = Integer.parseInt(matcher.group(2)); |
|
|
|
|
int year= Integer.parseInt(matcher.group(3)); |
|
|
|
|
int hour = Integer.parseInt(matcher.group(4)); |
|
|
|
|
int min = Integer.parseInt(matcher.group(5)); |
|
|
|
|
} |
|
|
|
|
protected Result<LocalTime> parseEndTime(String string) { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
@Override |
|
|
|
|
protected Result<LocalDate> parseStartDate(String string) { |
|
|
|
|
return parseGermanDate(string); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
|
protected Result<LocalTime> parseStartTime(String string) { |
|
|
|
|
return parseGermanTime(string); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* Die Kuba-Seite haut einen haufen Script mit raus, der dazu führt, dass die Tags nicht richtig geparsed werden. |
|
|
|
|
* Also schneiden wir den kompletten header ab... |
|
|
|
|
* @param inputStream eingehender InputStream, verpackt in Result |
|
|
|
|