Browse Source

added hash to appointment, implemented KassaBlanca parser

Signed-off-by: Stephan Richter <s.richter@srsoftware.de>
main
Stephan Richter 7 months ago
parent
commit
3158db3bbd
  1. 6
      de.srsoftware.cal.api/src/main/java/de/srsoftware/cal/api/Appointment.java
  2. 11
      de.srsoftware.cal.app/src/main/java/de/srsoftware/cal/app/Application.java
  3. 4
      de.srsoftware.cal.importer/build.gradle.kts
  4. 9
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/BaseAppointment.java
  5. 43
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/BaseImporter.java
  6. 126
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Kassablanca.java
  7. 5
      de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

6
de.srsoftware.cal.api/src/main/java/de/srsoftware/cal/api/Appointment.java

@ -34,6 +34,12 @@ public interface Appointment { @@ -34,6 +34,12 @@ public interface Appointment {
*/
Optional<LocalDateTime> end();
/**
* create a unique identifier based on the event content
* @return
*/
String hash();
/**
* ID of the appointment unique within this system
* @return the appointment`s id

11
de.srsoftware.cal.app/src/main/java/de/srsoftware/cal/app/Application.java

@ -1,7 +1,8 @@ @@ -1,7 +1,8 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.app;
import de.srsoftware.cal.importer.jena.Rosenkeller;
import de.srsoftware.cal.importer.jena.Kassablanca;
import java.security.NoSuchAlgorithmException;
/**
* Test application
@ -14,9 +15,9 @@ public class Application { @@ -14,9 +15,9 @@ public class Application {
* sandbox
* @param args default
*/
public static void main(String[] args) {
var rosenkeller = new Rosenkeller();
var appointments = rosenkeller.fetch();
appointments.forEach(System.err::println);
public static void main(String[] args) throws NoSuchAlgorithmException {
var importer = new Kassablanca();
var appointments = importer.fetch();
appointments.forEach(System.out::println);
}
}

4
de.srsoftware.cal.importer/build.gradle.kts

@ -3,6 +3,6 @@ description = "OpenCloudCal : Importers" @@ -3,6 +3,6 @@ description = "OpenCloudCal : Importers"
dependencies {
implementation(project(":de.srsoftware.cal.api"))
implementation("de.srsoftware:tools.optionals:1.0.0")
implementation("de.srsoftware:tools.util:1.1.2")
implementation("de.srsoftware:tools.web:1.3.2")
implementation("de.srsoftware:tools.util:1.1.3")
implementation("de.srsoftware:tools.web:1.3.3")
}

9
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/BaseAppointment.java

@ -17,6 +17,7 @@ public class BaseAppointment implements Appointment { @@ -17,6 +17,7 @@ public class BaseAppointment implements Appointment {
private final long id;
private final String title, description;
private final LocalDateTime end, start;
private final String hash;
private Coords coords = null;
private final Set<Attachment> attachments = new HashSet<>();
private final Set<String> tags = new HashSet<>();
@ -32,9 +33,10 @@ public class BaseAppointment implements Appointment { @@ -32,9 +33,10 @@ public class BaseAppointment implements Appointment {
* @param end set the end date
* @param location set the location
*/
public BaseAppointment(long id, String title, String description, LocalDateTime start, LocalDateTime end, String location) {
public BaseAppointment(long id, String title, String description, LocalDateTime start, LocalDateTime end, String location, String hash) {
this.description = description;
this.end = end;
this.hash = hash;
this.id = id;
this.location = location;
this.start = start;
@ -131,6 +133,11 @@ public class BaseAppointment implements Appointment { @@ -131,6 +133,11 @@ public class BaseAppointment implements Appointment {
return nullable(end);
}
@Override
public String hash() {
return hash;
}
@Override
public long id() {
return id;

43
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/BaseImporter.java

@ -1,7 +1,9 @@ @@ -1,7 +1,9 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer;
import static de.srsoftware.tools.Strings.hex;
import static de.srsoftware.tools.TagFilter.ofType;
import static java.nio.charset.StandardCharsets.UTF_8;
import de.srsoftware.cal.api.*;
import de.srsoftware.tools.*;
@ -12,6 +14,8 @@ import java.net.MalformedURLException; @@ -12,6 +14,8 @@ import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
@ -20,6 +24,13 @@ import java.util.Optional; @@ -20,6 +24,13 @@ import java.util.Optional;
import java.util.stream.Stream;
public abstract class BaseImporter implements Importer {
private static final String SHA256 = "SHA-256";
private final MessageDigest digest;
protected BaseImporter() throws NoSuchAlgorithmException {
digest = MessageDigest.getInstance(SHA256);
}
protected abstract String baseUrl();
@Override
@ -47,7 +58,7 @@ public abstract class BaseImporter implements Importer { @@ -47,7 +58,7 @@ public abstract class BaseImporter implements Importer {
}
protected Result<String> extractDescription(Tag eventTag){
protected Result<String> extractDescription(Tag eventTag) {
Result<Tag> titleTag = extractDescriptionTag(eventTag);
if (titleTag.optional().isEmpty()) return transform(titleTag);
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
@ -91,7 +102,9 @@ public abstract class BaseImporter implements Importer { @@ -91,7 +102,9 @@ public abstract class BaseImporter implements Importer {
if (locationResult.optional().isEmpty()) return transform(locationResult);
var location = locationResult.optional().get();
var event = new BaseAppointment(id, title, description, start, end, location) //
var hash = hash("%s@%s".formatted(start, location));
var event = new BaseAppointment(id, title, description, start, end, location, hash) //
.add(extractAttachments(eventTag))
.addLinks(extractLinks(eventTag))
.tags(extractTags(eventTag));
@ -139,7 +152,7 @@ public abstract class BaseImporter implements Importer { @@ -139,7 +152,7 @@ public abstract class BaseImporter implements Importer {
public abstract Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult);
private Result<Tag> extractLinksTag(Tag eventTag) {
protected Result<Tag> extractLinksTag(Tag eventTag) {
return extractDescriptionTag(eventTag);
}
@ -153,9 +166,9 @@ public abstract class BaseImporter implements Importer { @@ -153,9 +166,9 @@ public abstract class BaseImporter implements Importer {
protected Result<LocalDateTime> extractStart(Tag eventTag) {
Result<Tag> endTag = extractStartTag(eventTag);
if (endTag.optional().isEmpty()) return transform(endTag);
return parseStartDate(endTag.optional().get().toString(0));
Result<Tag> startTag = extractStartTag(eventTag);
if (startTag.optional().isEmpty()) return transform(startTag);
return parseStartDate(startTag.optional().get().strip());
}
protected abstract Result<Tag> extractStartTag(Tag eventTag);
@ -164,9 +177,9 @@ public abstract class BaseImporter implements Importer { @@ -164,9 +177,9 @@ public abstract class BaseImporter implements Importer {
protected abstract List<String> extractTags(Tag eventTag);
protected Result<String> extractTitle(Tag eventTag) {
Result<Tag> locationTag = extractTitleTag(eventTag);
if (locationTag.optional().isEmpty()) return transform(locationTag);
var inner = locationTag.optional().flatMap(tag -> tag.inner(2));
Result<Tag> titleTag = extractTitleTag(eventTag);
if (titleTag.optional().isEmpty()) return transform(titleTag);
var inner = titleTag.optional().flatMap(tag -> tag.inner(2));
if (inner.isPresent()) return Payload.of(inner.get());
return Error.of("No title found");
}
@ -186,9 +199,21 @@ public abstract class BaseImporter implements Importer { @@ -186,9 +199,21 @@ public abstract class BaseImporter implements Importer {
return stream //
.map(this::url)
.map(this::loadEvent)
.peek(e -> {
if (e instanceof Error<Appointment> err) System.err.println(err);
})
.flatMap(result -> result.optional().stream());
}
/**
* create a hash from a text
* @param plain the plain text
* @return the hash of the plain text
*/
protected String hash(String plain){
return hex(digest.digest(plain.getBytes(UTF_8)));
}
protected static <T> Result<T> invalidParameter(Result<?> result) {
return Error.format("Invalid parameter: %s", result.getClass().getSimpleName());
}

126
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Kassablanca.java

@ -0,0 +1,126 @@ @@ -0,0 +1,126 @@
/* © SRSoftware 2024 */
package de.srsoftware.cal.importer.jena;
import static de.srsoftware.tools.TagFilter.*;
import de.srsoftware.cal.importer.BaseImporter;
import de.srsoftware.tools.*;
import de.srsoftware.tools.Error;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDateTime;
import java.util.List;
import java.util.regex.Pattern;
public class Kassablanca extends BaseImporter {
public static final String BASE_URL = "https://www.kassablanca.de";
private static final String APPOINTMENT_TAG_ID = "entry-content";
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d+).(\\d+).(\\d+).*Beginn\\s*(\\d+):(\\d+)\\s*Uhr");
private static final String LOCATION = "Kassablanca e.V., Felsenkellerstr. 13a, 07745 Jena";
public Kassablanca() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return BASE_URL;
}
@Override
protected Result<Tag> extractDescriptionTag(Tag eventTag) {
var list = eventTag.find(attributeHas("class", "se-content"));
if (list.size() == 1) return Payload.of(list.getFirst());
return Error.of("Failed to find description tag");
}
@Override
protected Result<Tag> extractEndTag(Tag eventTag) {
return Error.format("end date not supported");
}
@Override
protected Result<Tag> extractEventTag(Result<Tag> pageResult) {
if (pageResult.optional().isEmpty()) return transform(pageResult);
var list = pageResult.optional().get().find(attributeEquals("class", APPOINTMENT_TAG_ID));
if (list.size() == 1) return Payload.of(list.getFirst());
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID);
}
@Override
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) {
if (programPage.optional().isEmpty()) return transform(programPage);
List<String> list = programPage.optional()
.get() //
.find(attributeHas("class", "eventrow"))
.stream()
.flatMap(t -> t.find(ofType("h3")).stream())
.map(t -> t.find(ofType("a")))
.flatMap(List::stream)
.map(t -> t.get("href"))
.toList();
return Payload.of(list);
}
@Override
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) {
if (tagResult.optional().isEmpty()) return transform(tagResult);
var tag = tagResult.optional().get();
tag.find(attributeEquals("id", "filterbar")).stream().findAny().ifPresent(Tag::remove); // remove div with unrelated links
var anchors = tag.find(withAttribute("href"));
return Payload.of(anchors);
}
@Override
protected Result<Tag> extractLinksTag(Tag eventTag) {
return Payload.of(eventTag);
}
@Override
protected Result<Tag> extractLocationTag(Tag eventTag) {
return Payload.of(new Text(LOCATION));
}
@Override
protected Result<Tag> extractStartTag(Tag eventTag) {
List<Tag> tags = eventTag.find(attributeEquals("class", "se-header"));
if (tags.size() == 1) return Payload.of(tags.getFirst());
return Error.of("Failed to find event time information");
}
@Override
protected List<String> extractTags(Tag eventTag) {
return List.of("Kassablanca");
}
@Override
protected Result<Tag> extractTitleTag(Tag eventTag) {
var list = eventTag.find(ofType("h1"));
if (list.size() == 1) return Payload.of(list.getFirst());
return Error.of("Failed to find title tag");
}
@Override
protected Result<LocalDateTime> parseEndDate(String string) {
return null;
}
@Override
protected Result<LocalDateTime> parseStartDate(String string) {
var matcher = START_DATE_PATTERN.matcher(string);
if (matcher.find()) {
var day = Integer.parseInt(matcher.group(1));
var month = Integer.parseInt(matcher.group(2));
var year = Integer.parseInt(matcher.group(3));
var hour = Integer.parseInt(matcher.group(4));
var minute = Integer.parseInt(matcher.group(5));
var date = LocalDateTime.of(year, month, day, hour, minute);
return Payload.of(date);
}
return Error.of("Could not recognize start date/time");
}
@Override
protected String programURL() {
return BASE_URL + "/programm";
}
}

5
de.srsoftware.cal.importer/src/main/java/de/srsoftware/cal/importer/jena/Rosenkeller.java

@ -9,6 +9,7 @@ import de.srsoftware.tools.Error; @@ -9,6 +9,7 @@ import de.srsoftware.tools.Error;
import de.srsoftware.tools.Payload;
import de.srsoftware.tools.Result;
import de.srsoftware.tools.Tag;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDateTime;
import java.util.List;
import java.util.regex.Pattern;
@ -19,6 +20,10 @@ public class Rosenkeller extends BaseImporter { @@ -19,6 +20,10 @@ public class Rosenkeller extends BaseImporter {
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d+) (\\w+)(\\W+(\\d+):(\\d+))?");
private static final String DEFAULT_LOCATION = "Rosenkeller, Johannisstr. 13, 07743 Jena";
public Rosenkeller() throws NoSuchAlgorithmException {
super();
}
@Override
protected String baseUrl() {
return BASE_URL;

Loading…
Cancel
Save