Commit 094de6b9 authored by Gerrit Hübbers's avatar Gerrit Hübbers 🃏
Browse files

Finalize BibtexBundlesStreamSource

parent 0561d6bb
......@@ -34,14 +34,39 @@ import org.slf4j.LoggerFactory;
public class BibtexBundlesStreamSource implements BundlesStreamSource {
private final static Key INSTITUTION_KEY = new Key("institution");
private final static Key BOOK_INSTITUTION_KEY = new Key("bookinstitution");
private final static Key REVIEWSTATUS_KEY = new Key("reviewstatus");
private final static Key BOOK_REVIEWSTATUS_KEY = new Key("bookreviewstatus");
private final static Key LICENCE_KEY = new Key("licence");
private final static Key BOOK_LICENCE_KEY = new Key("booklicence");
private final static Key PUBLICATIONSTATUS_KEY = new Key("publicationstatus");
private final static Key BOOK_PUBLICATIONSTATUS_KEY = new Key("bookpublicationstatus");
private final static Key LANGUAGE_KEY = new Key("language");
private final static Key BOOK_LANGUAGE_KEY = new Key("booklanguage");
private final static Key EMBARGODATE_KEY = new Key("embargodate");
private final static Key BOOK_EMBARGODATE_KEY = new Key("bookembargodate");
private final static Key KEYWORDS_KEY = new Key("keywords");
private final static Key BOOK_KEYWORDS_KEY = new Key("bookkeywords");
private final static Logger LOG = LoggerFactory.getLogger(BibtexBundlesStreamSource.class);
private LaTeXParser latexParser;
private LaTeXPrinter latexPrinter = new LaTeXPrinter();
private String reference = "bibtex-uuid" + UUID.randomUUID().toString();
private String bibtexPayload;
public BibtexBundlesStreamSource(String bibtexPayload) {
this.bibtexPayload = bibtexPayload;
try {
latexParser = new LaTeXParser();
}
catch (ParseException e) {
LOG.error("Problem initializing LaTeXParser", e);
throw new RuntimeException(e);
}
}
public BibtexBundlesStreamSource(String bibtexPayload, long bundlesSourceId) {
......@@ -49,14 +74,33 @@ public class BibtexBundlesStreamSource implements BundlesStreamSource {
this.reference = "bibtex-bundlessource-" + bundlesSourceId;
}
private String convertLatexValueToNormalString(Value input) {
String result;
String rawFieldValue = input.toUserString();
// remove newlines, as they make problems with LaTeX parsing
String cleanRawFieldValue = rawFieldValue.replace("\r", "").replace("\n", " ");
List<LaTeXObject> latexObjects;
try {
latexObjects = latexParser.parse(cleanRawFieldValue);
}
catch (TokenMgrException | ParseException e1) {
LOG.error("Problem parsing userString=" + cleanRawFieldValue, e1);
throw new RuntimeException(e1);
}
result = latexPrinter.print(latexObjects);
return result;
}
@Override
public Stream<Bundle> getBundlesStream() {
Stream<Bundle> result;
try {
BibTeXParser bibtexParser = new BibTeXParser();
LaTeXParser latexParser = new LaTeXParser();
LaTeXPrinter latexPrinter = new LaTeXPrinter();
BibTeXDatabase database;
try ( Reader reader = new StringReader(bibtexPayload) ) {
......@@ -87,20 +131,12 @@ public class BibtexBundlesStreamSource implements BundlesStreamSource {
Map<Key, Value> entryFields = e.getFields();
entryFields.forEach( (fieldType, fieldValue) -> {
String fieldTypeString = fieldType.getValue();
String rawFieldValue = fieldValue.toUserString();
// remove newlines, as they make problems with LaTeX parsing
String cleanRawFieldValue = rawFieldValue.replace("\r", "").replace("\n", " ");
List<LaTeXObject> latexObjects;
try {
latexObjects = latexParser.parse(cleanRawFieldValue);
}
catch (TokenMgrException | ParseException e1) {
LOG.error("Problem parsing userString=" + cleanRawFieldValue, e1);
throw new RuntimeException(e1);
}
String cleanFieldValue = latexPrinter.print(latexObjects);
String cleanFieldValue = convertLatexValueToNormalString(fieldValue);
List<Metadatum> fieldTypeMetadata;
String fieldTypeString = fieldType.getValue();
switch (fieldTypeString) {
case "abstract" : fieldTypeMetadata = getSplittedMetadata("dc.description.abstract", "@@", cleanFieldValue); break;
case "author" : fieldTypeMetadata = getSplittedMetadata("dc.contributor.author", "; ", cleanFieldValue); break;
......@@ -127,16 +163,60 @@ public class BibtexBundlesStreamSource implements BundlesStreamSource {
case "institution" : fieldTypeMetadata = getMetadatum("dc.contributor.corporateeditor", cleanFieldValue); break;
case "series" : fieldTypeMetadata = getMetadatum("dc.source.series", cleanFieldValue); break;
case "isbn" : fieldTypeMetadata = getMetadatum("dc.identifier.isbn", cleanFieldValue); break;
case "publisher" : fieldTypeMetadata = getMetadatum("dc.publisher", cleanFieldValue); break;
default : fieldTypeMetadata = null;
}
if (fieldTypeMetadata != null && 0 != fieldTypeMetadata.size() ) {
metadata.addAll(fieldTypeMetadata);
}
});
if (!entryFields.containsKey( new Key("reviewstatus")) && entryFields.containsKey( new Key("bookreviewstatus")) ) {
// if the following keys don't exist for an incollection, but are available for the containing collection, use that collection's key entries
// also, don't use bookurn, bookurl, bookdoi, as this identifiers are used for publication identification
if (!entryFields.containsKey(REVIEWSTATUS_KEY) && entryFields.containsKey(BOOK_REVIEWSTATUS_KEY) ) {
Value value = entryFields.get(BOOK_REVIEWSTATUS_KEY);
String cleaned =convertLatexValueToNormalString(value);
List<Metadatum> bookMetadata = getReviewStatusMetadatum(cleaned);
metadata.addAll(bookMetadata);
}
if (!entryFields.containsKey(INSTITUTION_KEY) && entryFields.containsKey(BOOK_INSTITUTION_KEY) ) {
Value value = entryFields.get(BOOK_INSTITUTION_KEY);
String cleaned =convertLatexValueToNormalString(value);
List<Metadatum> bookMetadata = getMetadatum("dc.contributor.corporateeditor", cleaned);
metadata.addAll(bookMetadata);
}
if (!entryFields.containsKey(LICENCE_KEY) && entryFields.containsKey(BOOK_LICENCE_KEY) ) {
Value value = entryFields.get(BOOK_LICENCE_KEY);
String cleaned =convertLatexValueToNormalString(value);
List<Metadatum> bookMetadata = getLicenceMetadatum(cleaned);
metadata.addAll(bookMetadata);
}
if (!entryFields.containsKey(PUBLICATIONSTATUS_KEY) && entryFields.containsKey(BOOK_PUBLICATIONSTATUS_KEY) ) {
Value value = entryFields.get(BOOK_PUBLICATIONSTATUS_KEY);
String cleaned =convertLatexValueToNormalString(value);
List<Metadatum> bookMetadata = getPublicationStatusMetadatum(cleaned);
metadata.addAll(bookMetadata);
}
if (!entryFields.containsKey(LANGUAGE_KEY) && entryFields.containsKey(BOOK_LANGUAGE_KEY) ) {
Value value = entryFields.get(BOOK_LANGUAGE_KEY);
String cleaned =convertLatexValueToNormalString(value);
List<Metadatum> bookMetadata = getMetadatum("dc.language", cleaned);
metadata.addAll(bookMetadata);
}
if (!entryFields.containsKey(EMBARGODATE_KEY) && entryFields.containsKey(BOOK_EMBARGODATE_KEY) ) {
Value value = entryFields.get(BOOK_EMBARGODATE_KEY);
String cleaned =convertLatexValueToNormalString(value);
List<Metadatum> bookMetadata = getMetadatum("internal.embargo.liftdate", cleaned);
metadata.addAll(bookMetadata);
}
if (!entryFields.containsKey(KEYWORDS_KEY) && entryFields.containsKey(BOOK_KEYWORDS_KEY) ) {
Value value = entryFields.get(BOOK_KEYWORDS_KEY);
String cleaned =convertLatexValueToNormalString(value);
List<Metadatum> bookMetadata = getSplittedMetadata("dc.subject.other", "; ", cleaned);
metadata.addAll(bookMetadata);
}
bundles.add(new AutonomouslyContentResolvingBundle(metadata) );
});
......
......@@ -139,6 +139,76 @@ public class BibtexBundlesStreamSourceTest {
// include encompassing collection's corporate editor
assertTrue(metadata.contains( new SimpleMetadatum("dc.contributor.corporateeditor", "Fraunhofer-Institut für Offene Kommunikationssysteme FOKUS, Kompetenzzentrum Öffentliche IT") ) );
// don't include encompassing collection's URL/URI
assertFalse(metadata.contains( new SimpleMetadatum("dc.identifier.uri", "http://oeffentliche-it.de/unberechenbar") ) );
}
{
Set<Metadatum> metadata = bss.getAllMetadata("Wolf.2010");
metadata.stream().map(Object::toString).forEach(LOG::info);
assertTrue(metadata.contains( new SimpleMetadatum("internal.dda.reference", "bibtex-bundlessource-4711@@Wolf.2010") ) );
assertTrue(metadata.contains( Stock.INCOLLECTION.getMetadatum() ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.contributor.author", "Wolf, Michael") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.title", "Regionalisierung und Raumbeobachtung") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.identifier.uri", "https://www.ssoar.info/ssoar/handle/document/35620") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Bundesrepublik Deutschland") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Indikatorensystem") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Controlling") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Monitoring") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Planungsprozess") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Raumplanung") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Raumplanung und Regionalforschung") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Regionalisierung") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.subject.other", "Regionalplanung") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "203-217") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.source.volume", "352") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.publisher", "Verl. d. ARL") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.identifier.isbn", "978-3-88838-352-6") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.source.series", "ARL Arbeitsmaterial") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.contributor.editor", "Mielke, Bernd") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.contributor.editor", "Münter, Angelika") ) );
// don't include encompassing collection's title
assertFalse(metadata.contains( new SimpleMetadatum("dc.title", "Neue Regionalisierungsansätze in Nordrhein-Westfalen") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.date.issued", "2010") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.publisher.city", "Hannover") ) );
assertTrue(metadata.contains( ReviewStatus.PEER_REVIEWED.getInternalIdentifierReviewMetadatum() ) );
assertTrue(metadata.contains( new SimpleMetadatum("internal.embargo.liftdate", "2016-01-01") ) );
assertTrue(metadata.contains( PublicationStatus.PUBLISHED_VERSION.getInternalIdentifierPubstatusMetadatum() ) );
assertTrue(metadata.contains( Licence.DEPOSIT.getInternalIdentifierLicenceMetadatum() ) );
// don't include encompassing collection's mutually exclusive keywords
assertFalse(metadata.contains( new SimpleMetadatum("dc.subject.other", "Flächennutzungsplan") ) );
assertFalse(metadata.contains( new SimpleMetadatum("dc.subject.other", "Kooperation") ) );
assertFalse(metadata.contains( new SimpleMetadatum("dc.subject.other", "Nordrhein-Westfalen") ) );
assertFalse(metadata.contains( new SimpleMetadatum("dc.subject.other", "Planungsinstrument") ) );
assertFalse(metadata.contains( new SimpleMetadatum("dc.subject.other", "Raumordnung") ) );
assertFalse(metadata.contains( new SimpleMetadatum("dc.subject.other", "Strukturpolitik") ) );
// don't include encompassing collection's URN
assertFalse(metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:0168-ssoar-284478") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.language", "de") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.title.alternative", "Regionalisation and spatial observation") ) );
assertTrue(metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:0168-ssoar-356203") ) );
// don't include encompassing collection's review status
assertFalse(metadata.contains( ReviewStatus.REVIEWED.getInternalIdentifierReviewMetadatum() ) );
// don't include encompassing collection's embargo date
assertFalse(metadata.contains( new SimpleMetadatum("internal.embargo.liftdate", "2005-05-01") ) );
// as this incollection publication does not have an institution set, use encompassing collection's institution
assertTrue(metadata.contains( new SimpleMetadatum("dc.contributor.corporateeditor", "Akademie für Raumforschung und Landesplanung - Leibniz-Forum für Raumwissenschaften") ) );
// don't include encompassing collection's URL/URI
assertFalse(metadata.contains( new SimpleMetadatum("dc.identifier.uri", "https://shop.arl-net.de/neue-regionalisierungsansatze-in-nordrhein-westfalen.html") ) );
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment