added hash to appointment, implemented KassaBlanca parser
Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
This commit is contained in:
@@ -34,6 +34,12 @@ public interface Appointment {
|
|||||||
*/
|
*/
|
||||||
Optional<LocalDateTime> end();
|
Optional<LocalDateTime> end();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a unique identifier based on the event content
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
String hash();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ID of the appointment – unique within this system
|
* ID of the appointment – unique within this system
|
||||||
* @return the appointment`s id
|
* @return the appointment`s id
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
/* © SRSoftware 2024 */
|
/* © SRSoftware 2024 */
|
||||||
package de.srsoftware.cal.app;
|
package de.srsoftware.cal.app;
|
||||||
|
|
||||||
import de.srsoftware.cal.importer.jena.Rosenkeller;
|
import de.srsoftware.cal.importer.jena.Kassablanca;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test application
|
* Test application
|
||||||
@@ -14,9 +15,9 @@ public class Application {
|
|||||||
* sandbox
|
* sandbox
|
||||||
* @param args default
|
* @param args default
|
||||||
*/
|
*/
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) throws NoSuchAlgorithmException {
|
||||||
var rosenkeller = new Rosenkeller();
|
var importer = new Kassablanca();
|
||||||
var appointments = rosenkeller.fetch();
|
var appointments = importer.fetch();
|
||||||
appointments.forEach(System.err::println);
|
appointments.forEach(System.out::println);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,6 @@ description = "OpenCloudCal : Importers"
|
|||||||
dependencies {
|
dependencies {
|
||||||
implementation(project(":de.srsoftware.cal.api"))
|
implementation(project(":de.srsoftware.cal.api"))
|
||||||
implementation("de.srsoftware:tools.optionals:1.0.0")
|
implementation("de.srsoftware:tools.optionals:1.0.0")
|
||||||
implementation("de.srsoftware:tools.util:1.1.2")
|
implementation("de.srsoftware:tools.util:1.1.3")
|
||||||
implementation("de.srsoftware:tools.web:1.3.2")
|
implementation("de.srsoftware:tools.web:1.3.3")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ public class BaseAppointment implements Appointment {
|
|||||||
private final long id;
|
private final long id;
|
||||||
private final String title, description;
|
private final String title, description;
|
||||||
private final LocalDateTime end, start;
|
private final LocalDateTime end, start;
|
||||||
|
private final String hash;
|
||||||
private Coords coords = null;
|
private Coords coords = null;
|
||||||
private final Set<Attachment> attachments = new HashSet<>();
|
private final Set<Attachment> attachments = new HashSet<>();
|
||||||
private final Set<String> tags = new HashSet<>();
|
private final Set<String> tags = new HashSet<>();
|
||||||
@@ -32,9 +33,10 @@ public class BaseAppointment implements Appointment {
|
|||||||
* @param end set the end date
|
* @param end set the end date
|
||||||
* @param location set the location
|
* @param location set the location
|
||||||
*/
|
*/
|
||||||
public BaseAppointment(long id, String title, String description, LocalDateTime start, LocalDateTime end, String location) {
|
public BaseAppointment(long id, String title, String description, LocalDateTime start, LocalDateTime end, String location, String hash) {
|
||||||
this.description = description;
|
this.description = description;
|
||||||
this.end = end;
|
this.end = end;
|
||||||
|
this.hash = hash;
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.location = location;
|
this.location = location;
|
||||||
this.start = start;
|
this.start = start;
|
||||||
@@ -131,6 +133,11 @@ public class BaseAppointment implements Appointment {
|
|||||||
return nullable(end);
|
return nullable(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String hash() {
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long id() {
|
public long id() {
|
||||||
return id;
|
return id;
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
/* © SRSoftware 2024 */
|
/* © SRSoftware 2024 */
|
||||||
package de.srsoftware.cal.importer;
|
package de.srsoftware.cal.importer;
|
||||||
|
|
||||||
|
import static de.srsoftware.tools.Strings.hex;
|
||||||
import static de.srsoftware.tools.TagFilter.ofType;
|
import static de.srsoftware.tools.TagFilter.ofType;
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
import de.srsoftware.cal.api.*;
|
import de.srsoftware.cal.api.*;
|
||||||
import de.srsoftware.tools.*;
|
import de.srsoftware.tools.*;
|
||||||
@@ -12,6 +14,8 @@ import java.net.MalformedURLException;
|
|||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -20,6 +24,13 @@ import java.util.Optional;
|
|||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public abstract class BaseImporter implements Importer {
|
public abstract class BaseImporter implements Importer {
|
||||||
|
private static final String SHA256 = "SHA-256";
|
||||||
|
private final MessageDigest digest;
|
||||||
|
|
||||||
|
protected BaseImporter() throws NoSuchAlgorithmException {
|
||||||
|
digest = MessageDigest.getInstance(SHA256);
|
||||||
|
}
|
||||||
|
|
||||||
protected abstract String baseUrl();
|
protected abstract String baseUrl();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -47,7 +58,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected Result<String> extractDescription(Tag eventTag){
|
protected Result<String> extractDescription(Tag eventTag) {
|
||||||
Result<Tag> titleTag = extractDescriptionTag(eventTag);
|
Result<Tag> titleTag = extractDescriptionTag(eventTag);
|
||||||
if (titleTag.optional().isEmpty()) return transform(titleTag);
|
if (titleTag.optional().isEmpty()) return transform(titleTag);
|
||||||
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
|
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
|
||||||
@@ -91,7 +102,9 @@ public abstract class BaseImporter implements Importer {
|
|||||||
if (locationResult.optional().isEmpty()) return transform(locationResult);
|
if (locationResult.optional().isEmpty()) return transform(locationResult);
|
||||||
var location = locationResult.optional().get();
|
var location = locationResult.optional().get();
|
||||||
|
|
||||||
var event = new BaseAppointment(id, title, description, start, end, location) //
|
var hash = hash("%s@%s".formatted(start, location));
|
||||||
|
|
||||||
|
var event = new BaseAppointment(id, title, description, start, end, location, hash) //
|
||||||
.add(extractAttachments(eventTag))
|
.add(extractAttachments(eventTag))
|
||||||
.addLinks(extractLinks(eventTag))
|
.addLinks(extractLinks(eventTag))
|
||||||
.tags(extractTags(eventTag));
|
.tags(extractTags(eventTag));
|
||||||
@@ -139,7 +152,7 @@ public abstract class BaseImporter implements Importer {
|
|||||||
|
|
||||||
public abstract Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult);
|
public abstract Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult);
|
||||||
|
|
||||||
private Result<Tag> extractLinksTag(Tag eventTag) {
|
protected Result<Tag> extractLinksTag(Tag eventTag) {
|
||||||
return extractDescriptionTag(eventTag);
|
return extractDescriptionTag(eventTag);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,9 +166,9 @@ public abstract class BaseImporter implements Importer {
|
|||||||
|
|
||||||
|
|
||||||
protected Result<LocalDateTime> extractStart(Tag eventTag) {
|
protected Result<LocalDateTime> extractStart(Tag eventTag) {
|
||||||
Result<Tag> endTag = extractStartTag(eventTag);
|
Result<Tag> startTag = extractStartTag(eventTag);
|
||||||
if (endTag.optional().isEmpty()) return transform(endTag);
|
if (startTag.optional().isEmpty()) return transform(startTag);
|
||||||
return parseStartDate(endTag.optional().get().toString(0));
|
return parseStartDate(startTag.optional().get().strip());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Result<Tag> extractStartTag(Tag eventTag);
|
protected abstract Result<Tag> extractStartTag(Tag eventTag);
|
||||||
@@ -164,9 +177,9 @@ public abstract class BaseImporter implements Importer {
|
|||||||
protected abstract List<String> extractTags(Tag eventTag);
|
protected abstract List<String> extractTags(Tag eventTag);
|
||||||
|
|
||||||
protected Result<String> extractTitle(Tag eventTag) {
|
protected Result<String> extractTitle(Tag eventTag) {
|
||||||
Result<Tag> locationTag = extractTitleTag(eventTag);
|
Result<Tag> titleTag = extractTitleTag(eventTag);
|
||||||
if (locationTag.optional().isEmpty()) return transform(locationTag);
|
if (titleTag.optional().isEmpty()) return transform(titleTag);
|
||||||
var inner = locationTag.optional().flatMap(tag -> tag.inner(2));
|
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
|
||||||
if (inner.isPresent()) return Payload.of(inner.get());
|
if (inner.isPresent()) return Payload.of(inner.get());
|
||||||
return Error.of("No title found");
|
return Error.of("No title found");
|
||||||
}
|
}
|
||||||
@@ -186,9 +199,21 @@ public abstract class BaseImporter implements Importer {
|
|||||||
return stream //
|
return stream //
|
||||||
.map(this::url)
|
.map(this::url)
|
||||||
.map(this::loadEvent)
|
.map(this::loadEvent)
|
||||||
|
.peek(e -> {
|
||||||
|
if (e instanceof Error<Appointment> err) System.err.println(err);
|
||||||
|
})
|
||||||
.flatMap(result -> result.optional().stream());
|
.flatMap(result -> result.optional().stream());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a hash from a text
|
||||||
|
* @param plain the plain text
|
||||||
|
* @return the hash of the plain text
|
||||||
|
*/
|
||||||
|
protected String hash(String plain){
|
||||||
|
return hex(digest.digest(plain.getBytes(UTF_8)));
|
||||||
|
}
|
||||||
|
|
||||||
protected static <T> Result<T> invalidParameter(Result<?> result) {
|
protected static <T> Result<T> invalidParameter(Result<?> result) {
|
||||||
return Error.format("Invalid parameter: %s", result.getClass().getSimpleName());
|
return Error.format("Invalid parameter: %s", result.getClass().getSimpleName());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,126 @@
|
|||||||
|
/* © SRSoftware 2024 */
|
||||||
|
package de.srsoftware.cal.importer.jena;
|
||||||
|
|
||||||
|
import static de.srsoftware.tools.TagFilter.*;
|
||||||
|
|
||||||
|
import de.srsoftware.cal.importer.BaseImporter;
|
||||||
|
import de.srsoftware.tools.*;
|
||||||
|
import de.srsoftware.tools.Error;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class Kassablanca extends BaseImporter {
|
||||||
|
public static final String BASE_URL = "https://www.kassablanca.de";
|
||||||
|
private static final String APPOINTMENT_TAG_ID = "entry-content";
|
||||||
|
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d+).(\\d+).(\\d+).*Beginn\\s*(\\d+):(\\d+)\\s*Uhr");
|
||||||
|
private static final String LOCATION = "Kassablanca e.V., Felsenkellerstr. 13a, 07745 Jena";
|
||||||
|
|
||||||
|
public Kassablanca() throws NoSuchAlgorithmException {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String baseUrl() {
|
||||||
|
return BASE_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(attributeHas("class", "se-content"));
|
||||||
|
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||||
|
return Error.of("Failed to find description tag");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractEndTag(Tag eventTag) {
|
||||||
|
return Error.format("end date not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
|
||||||
|
if (pageResult.optional().isEmpty()) return transform(pageResult);
|
||||||
|
var list = pageResult.optional().get().find(attributeEquals("class", APPOINTMENT_TAG_ID));
|
||||||
|
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||||
|
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
|
||||||
|
if (programPage.optional().isEmpty()) return transform(programPage);
|
||||||
|
List<String> list = programPage.optional()
|
||||||
|
.get() //
|
||||||
|
.find(attributeHas("class", "eventrow"))
|
||||||
|
.stream()
|
||||||
|
.flatMap(t -> t.find(ofType("h3")).stream())
|
||||||
|
.map(t -> t.find(ofType("a")))
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.map(t -> t.get("href"))
|
||||||
|
.toList();
|
||||||
|
return Payload.of(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) {
|
||||||
|
if (tagResult.optional().isEmpty()) return transform(tagResult);
|
||||||
|
var tag = tagResult.optional().get();
|
||||||
|
tag.find(attributeEquals("id", "filterbar")).stream().findAny().ifPresent(Tag::remove); // remove div with unrelated links
|
||||||
|
var anchors = tag.find(withAttribute("href"));
|
||||||
|
return Payload.of(anchors);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractLinksTag(Tag eventTag) {
|
||||||
|
return Payload.of(eventTag);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractLocationTag(Tag eventTag) {
|
||||||
|
return Payload.of(new Text(LOCATION));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractStartTag(Tag eventTag) {
|
||||||
|
List<Tag> tags = eventTag.find(attributeEquals("class", "se-header"));
|
||||||
|
if (tags.size() == 1) return Payload.of(tags.getFirst());
|
||||||
|
return Error.of("Failed to find event time information");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> extractTags(Tag eventTag) {
|
||||||
|
return List.of("Kassablanca");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<Tag> extractTitleTag(Tag eventTag) {
|
||||||
|
var list = eventTag.find(ofType("h1"));
|
||||||
|
if (list.size() == 1) return Payload.of(list.getFirst());
|
||||||
|
return Error.of("Failed to find title tag");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDateTime> parseEndDate(String string) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Result<LocalDateTime> parseStartDate(String string) {
|
||||||
|
var matcher = START_DATE_PATTERN.matcher(string);
|
||||||
|
if (matcher.find()) {
|
||||||
|
var day = Integer.parseInt(matcher.group(1));
|
||||||
|
var month = Integer.parseInt(matcher.group(2));
|
||||||
|
var year = Integer.parseInt(matcher.group(3));
|
||||||
|
var hour = Integer.parseInt(matcher.group(4));
|
||||||
|
var minute = Integer.parseInt(matcher.group(5));
|
||||||
|
var date = LocalDateTime.of(year, month, day, hour, minute);
|
||||||
|
return Payload.of(date);
|
||||||
|
}
|
||||||
|
return Error.of("Could not recognize start date/time");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String programURL() {
|
||||||
|
return BASE_URL + "/programm";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -9,6 +9,7 @@ import de.srsoftware.tools.Error;
|
|||||||
import de.srsoftware.tools.Payload;
|
import de.srsoftware.tools.Payload;
|
||||||
import de.srsoftware.tools.Result;
|
import de.srsoftware.tools.Result;
|
||||||
import de.srsoftware.tools.Tag;
|
import de.srsoftware.tools.Tag;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
@@ -19,6 +20,10 @@ public class Rosenkeller extends BaseImporter {
|
|||||||
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
|
||||||
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
|
||||||
|
|
||||||
|
public Rosenkeller() throws NoSuchAlgorithmException {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String baseUrl() {
|
protected String baseUrl() {
|
||||||
return BASE_URL;
|
return BASE_URL;
|
||||||
|
|||||||
Reference in New Issue
Block a user