refactored CosmicDawn importer - now working!
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -92,7 +92,7 @@ public class Util {
|
||||
}
|
||||
|
||||
public static Result<LocalDate> parseGermanDate(String s){
|
||||
var match = GERMAN_DATE_PATTERN.matcher(s);
|
||||
var match = GERMAN_DATE_PATTERN.matcher(" "+s+" ");
|
||||
if (match.find()){
|
||||
var day = Integer.parseInt(match.group(1));
|
||||
var month = Integer.parseInt(match.group(2));
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
/* © SRSoftware 2024 */
|
||||
package de.srsoftware.cal.importer.jena;
|
||||
|
||||
import static de.srsoftware.cal.Util.parseGermanDate;
|
||||
import static de.srsoftware.cal.Util.parseGermanTime;
|
||||
import static de.srsoftware.tools.Error.error;
|
||||
import static de.srsoftware.tools.Result.transform;
|
||||
import static de.srsoftware.tools.Tag.CLASS;
|
||||
import static de.srsoftware.tools.TagFilter.*;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import de.srsoftware.cal.BaseImporter;
|
||||
import de.srsoftware.cal.Util;
|
||||
import de.srsoftware.cal.api.Coords;
|
||||
import de.srsoftware.tools.*;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
@@ -16,11 +21,16 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.util.DuplicateFormatFlagsException;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public abstract class CosmicDawn extends BaseImporter {
|
||||
public class CosmicDawn extends BaseImporter {
|
||||
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d\\d?).(\\d\\d?).(\\d{4}).*(\\d\\d?):(\\d\\d?)");
|
||||
private static final String DEFAULT_LOCATION = "Cosmic Dawn e.V., Spitzweidenweg 28, 07743 Jena";
|
||||
private static final Coords DEFAULT_COORDS = new Coords(50.93663, 11.59254);
|
||||
|
||||
public CosmicDawn() throws NoSuchAlgorithmException {
|
||||
super();
|
||||
@@ -32,20 +42,45 @@ public abstract class CosmicDawn extends BaseImporter {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
|
||||
var list = eventTag.find(attributeEndsWith("class","event-body-content"));
|
||||
return list.isEmpty() ? error("failed to find <div class=\"…event-body-content\">") : Payload.of(list.getFirst());
|
||||
public String description() {
|
||||
return "Importiert Events des Kulturbahnhofs in Jena";
|
||||
}
|
||||
|
||||
protected Result<Tag> extractEndTag(Tag eventTag) {
|
||||
@Override
|
||||
protected Predicate<Tag> extractAttachmentsFilter() {
|
||||
return attributeContains(CLASS,"single-event-page");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractDescriptionFilter() {
|
||||
return attributeContains(CLASS,"event-body-content");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Coords> extractCoords(Tag eventTag) {
|
||||
return Payload.of(DEFAULT_COORDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndDateFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEndTimeFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
|
||||
if (pageResult.optional().isEmpty()) return transform(pageResult);
|
||||
List<Tag> list = pageResult.optional().get().find(attributeEquals("class", "inside-article"));
|
||||
return (list.isEmpty()) ? error("Failed to find <div class=\"inside-article\">!") : Payload.of(list.getFirst());
|
||||
var res = super.extractEventTag(pageResult);
|
||||
// remove youtube embeddings
|
||||
if (res instanceof Payload<Tag> payload) payload.get().find(attributeContains(CLASS,"youtube")).forEach(Tag::remove);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractEventTagFilter() {
|
||||
return attributeEquals(CLASS,"inside-article");
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -65,33 +100,39 @@ public abstract class CosmicDawn extends BaseImporter {
|
||||
return Payload.of(urlList);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLinksFilter() {
|
||||
return attributeContains(CLASS,"single-event-wrapper");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
||||
protected Result<String> extractLocation(Tag eventTag) {
|
||||
return Payload.of(DEFAULT_LOCATION);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractLocationFilter() {
|
||||
return null;
|
||||
}
|
||||
|
||||
protected Result<Tag> extractStartTag(Tag eventTag) {
|
||||
var dateTags = eventTag.find(attributeContains("class","event-date-time"));
|
||||
if (dateTags.isEmpty()) return error("Start date not found!");
|
||||
var times = eventTag.find(attributeEquals("class","event_time")).stream()
|
||||
.flatMap(tag -> tag.find(IS_SPAN).stream())
|
||||
.filter(tag -> tag.toString().contains("Begin"))
|
||||
.toList();
|
||||
if (times.isEmpty()) return error("Start time not found!");
|
||||
var div = Tag.of("div").add(dateTags.getFirst()).add(times.getFirst());
|
||||
return Payload.of(div);
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartDateFilter() {
|
||||
return attributeEquals("itemprop","startDate");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Predicate<Tag> extractStartTimeFilter() {
|
||||
return attributeEquals(CLASS,"event_time");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> extractTags(Tag eventTag) {
|
||||
return List.of();
|
||||
return List.of("Kulturbahnhof","Jena","CosmicDawn");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<Tag> extractTitleTag(Tag eventTag) {
|
||||
var list = eventTag.find(ofType("h1"));
|
||||
return list.isEmpty() ? error("failed to find <h1>") : Payload.of(list.getFirst());
|
||||
protected Predicate<Tag> extractTitleFilter() {
|
||||
return ofType("h1");
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -100,19 +141,21 @@ public abstract class CosmicDawn extends BaseImporter {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalDate> parseStartDate(String date) {
|
||||
var matcher = START_DATE_PATTERN.matcher(date);
|
||||
if (matcher.find()){
|
||||
int day = Integer.parseInt(matcher.group(1));
|
||||
int mon = Integer.parseInt(matcher.group(2));
|
||||
int year= Integer.parseInt(matcher.group(3));
|
||||
int hour = Integer.parseInt(matcher.group(4));
|
||||
int min = Integer.parseInt(matcher.group(5));
|
||||
}
|
||||
protected Result<LocalTime> parseEndTime(String string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
@Override
|
||||
protected Result<LocalDate> parseStartDate(String string) {
|
||||
return parseGermanDate(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Result<LocalTime> parseStartTime(String string) {
|
||||
return parseGermanTime(string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Die Kuba-Seite haut einen haufen Script mit raus, der dazu führt, dass die Tags nicht richtig geparsed werden.
|
||||
* Also schneiden wir den kompletten header ab...
|
||||
* @param inputStream eingehender InputStream, verpackt in Result
|
||||
|
||||
@@ -8,7 +8,7 @@ function attachmentList(json){
|
||||
var attachments = document.getElementById('attachments');
|
||||
attachments.innerHTML = '';
|
||||
for (var attachment of json){
|
||||
if (attachment.mime.startsWith('image')){
|
||||
if (attachment.mime.startsWith('image')||attachment.url.includes('.jpg')||attachment.url.includes('.png')||attachment.url.includes('.gif')){
|
||||
var img = document.createElement('img');
|
||||
img.src = attachment.url;
|
||||
attachments.appendChild(img);
|
||||
|
||||
Reference in New Issue
Block a user