7 changed files with 187 additions and 17 deletions
@ -0,0 +1,126 @@
@@ -0,0 +1,126 @@
|
||||
/* © SRSoftware 2024 */ |
||||
package de.srsoftware.cal.importer.jena; |
||||
|
||||
import static de.srsoftware.tools.TagFilter.*; |
||||
|
||||
import de.srsoftware.cal.importer.BaseImporter; |
||||
import de.srsoftware.tools.*; |
||||
import de.srsoftware.tools.Error; |
||||
import java.security.NoSuchAlgorithmException; |
||||
import java.time.LocalDateTime; |
||||
import java.util.List; |
||||
import java.util.regex.Pattern; |
||||
|
||||
public class Kassablanca extends BaseImporter { |
||||
public static final String BASE_URL = "https://www.kassablanca.de"; |
||||
private static final String APPOINTMENT_TAG_ID = "entry-content"; |
||||
private static final Pattern START_DATE_PATTERN = Pattern.compile("(\\d+).(\\d+).(\\d+).*Beginn\\s*(\\d+):(\\d+)\\s*Uhr"); |
||||
private static final String LOCATION = "Kassablanca e.V., Felsenkellerstr. 13a, 07745 Jena"; |
||||
|
||||
public Kassablanca() throws NoSuchAlgorithmException { |
||||
super(); |
||||
} |
||||
|
||||
@Override |
||||
protected String baseUrl() { |
||||
return BASE_URL; |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractDescriptionTag(Tag eventTag) { |
||||
var list = eventTag.find(attributeHas("class", "se-content")); |
||||
if (list.size() == 1) return Payload.of(list.getFirst()); |
||||
return Error.of("Failed to find description tag"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractEndTag(Tag eventTag) { |
||||
return Error.format("end date not supported"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractEventTag(Result<Tag> pageResult) { |
||||
if (pageResult.optional().isEmpty()) return transform(pageResult); |
||||
var list = pageResult.optional().get().find(attributeEquals("class", APPOINTMENT_TAG_ID)); |
||||
if (list.size() == 1) return Payload.of(list.getFirst()); |
||||
return Error.format("Could not find tag with id \"%s\"", APPOINTMENT_TAG_ID); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<List<String>> extractEventUrls(Result<Tag> programPage) { |
||||
if (programPage.optional().isEmpty()) return transform(programPage); |
||||
List<String> list = programPage.optional() |
||||
.get() //
|
||||
.find(attributeHas("class", "eventrow")) |
||||
.stream() |
||||
.flatMap(t -> t.find(ofType("h3")).stream()) |
||||
.map(t -> t.find(ofType("a"))) |
||||
.flatMap(List::stream) |
||||
.map(t -> t.get("href")) |
||||
.toList(); |
||||
return Payload.of(list); |
||||
} |
||||
|
||||
@Override |
||||
public Result<List<Tag>> extractLinkAnchors(Result<Tag> tagResult) { |
||||
if (tagResult.optional().isEmpty()) return transform(tagResult); |
||||
var tag = tagResult.optional().get(); |
||||
tag.find(attributeEquals("id", "filterbar")).stream().findAny().ifPresent(Tag::remove); // remove div with unrelated links
|
||||
var anchors = tag.find(withAttribute("href")); |
||||
return Payload.of(anchors); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractLinksTag(Tag eventTag) { |
||||
return Payload.of(eventTag); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractLocationTag(Tag eventTag) { |
||||
return Payload.of(new Text(LOCATION)); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractStartTag(Tag eventTag) { |
||||
List<Tag> tags = eventTag.find(attributeEquals("class", "se-header")); |
||||
if (tags.size() == 1) return Payload.of(tags.getFirst()); |
||||
return Error.of("Failed to find event time information"); |
||||
} |
||||
|
||||
@Override |
||||
protected List<String> extractTags(Tag eventTag) { |
||||
return List.of("Kassablanca"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<Tag> extractTitleTag(Tag eventTag) { |
||||
var list = eventTag.find(ofType("h1")); |
||||
if (list.size() == 1) return Payload.of(list.getFirst()); |
||||
return Error.of("Failed to find title tag"); |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDateTime> parseEndDate(String string) { |
||||
return null; |
||||
} |
||||
|
||||
@Override |
||||
protected Result<LocalDateTime> parseStartDate(String string) { |
||||
var matcher = START_DATE_PATTERN.matcher(string); |
||||
if (matcher.find()) { |
||||
var day = Integer.parseInt(matcher.group(1)); |
||||
var month = Integer.parseInt(matcher.group(2)); |
||||
var year = Integer.parseInt(matcher.group(3)); |
||||
var hour = Integer.parseInt(matcher.group(4)); |
||||
var minute = Integer.parseInt(matcher.group(5)); |
||||
var date = LocalDateTime.of(year, month, day, hour, minute); |
||||
return Payload.of(date); |
||||
} |
||||
return Error.of("Could not recognize start date/time"); |
||||
} |
||||
|
||||
@Override |
||||
protected String programURL() { |
||||
return BASE_URL + "/programm"; |
||||
} |
||||
} |
Loading…
Reference in new issue