Commit 029a9fef authored by Gerrit Hübbers's avatar Gerrit Hübbers 🃏
Browse files

Fix legacy metadatum bugs

parent f1673f15
......@@ -2,8 +2,11 @@ package org.gesis.dda.feeder.ssoar;
import org.gesis.dda.publishing.domain.Metadatum;
import org.gesis.dda.publishing.domain.impl.SimpleMetadatum;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public enum Licence {
CC_BY("1", "Creative Commons - Namensnennung", "Creative Commons - Attribution"),
CC_BY_NC_ND("2", "Creative Commons - Namensnennung, Nicht kommerz., Keine Bearbeitung", "Creative Commons - Attribution, Noncommercial, No Derivate Works"),
DEPOSIT("3", "Deposit Licence - Keine Weiterverbreitung, keine Bearbeitung", "Deposit Licence - No Redistribution, No Modifications"),
......@@ -41,6 +44,10 @@ public enum Licence {
CC_BY_NC_SA_3_0("35", "Creative Commons - Namensnennung, Nicht-kommerz., Weitergabe unter gleichen Bedingungen 3.0", "Creative Commons - Attribution-NonCommercial-ShareAlike 3.0"),
CC_BY_NC_SA_4_0("36", "Creative Commons - Namensnennung, Nicht-kommerz., Weitergabe unter gleichen Bedingungen 4.0", "Creative Commons - Attribution-NonCommercial-ShareAlike 4.0");
private final static Logger LOG = LoggerFactory.getLogger(Licence.class);
private final Metadatum deMetadatum;
private final Metadatum enMetadatum;
private final Metadatum internalMetadatum;
......@@ -68,4 +75,182 @@ public enum Licence {
public Metadatum getLegacyMetadatum() {
return legacyInternalMetadatum;
}
public static Metadatum getLegacyLicenceMetadatum(String input) {
Metadatum result;
Licence licenceResult = null;
boolean isCreativeCommons = false;
boolean isBy = false;
boolean isSa = false;
boolean isNc = false;
boolean isNd = false;
boolean isPublicDomain = false;
String version = null;
String inputLowerCase = input.toLowerCase();
if ( inputLowerCase.contains("creativecommons") || inputLowerCase.contains("cc") ) {
isCreativeCommons = true;
}
if ( inputLowerCase.contains("by") ) {
isBy = true;
}
if ( inputLowerCase.contains("sa") ) {
isSa = true;
}
if ( inputLowerCase.contains("nc") ) {
isNc = true;
}
if ( inputLowerCase.contains("nd") ) {
isNd = true;
}
if ( inputLowerCase.contains("nd") ) {
isNd = true;
}
if ( inputLowerCase.contains("zero") || inputLowerCase.contains("CC0") ) {
isPublicDomain = true;
}
if ( inputLowerCase.contains("4.0") ) {
version = "4.0";
}
else if ( inputLowerCase.contains("3.0") ) {
version = "3.0";
}
else if ( inputLowerCase.contains("2.0") ) {
version = "2.0";
}
else if ( inputLowerCase.contains("1.0") ) {
version = "1.0";
}
if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && !isNc && !isNd) {
if (null == version) {
licenceResult = Licence.CC_BY;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && isNc && isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_NC_ND;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && isSa && !isNc && !isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_SA;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && !isNc && isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_ND;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && isNc && !isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_NC;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_3_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && isSa && isNc && !isNd) {
licenceResult = Licence.CC_BY_NC_SA;
if (null == version) {
licenceResult = Licence.CC_BY_NC_SA;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_SA_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_SA_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_SA_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_SA_4_0;
}
}
else if ( isCreativeCommons && isPublicDomain ) {
licenceResult = Licence.CC_0;
}
else if ( inputLowerCase.contains("deposit") ) {
licenceResult = Licence.DEPOSIT;
}
if (null != licenceResult) {
result = licenceResult.getLegacyMetadatum();
}
else {
LOG.warn("Could not identify licence for input={}", input);
result = null;
}
return result;
}
}
......@@ -37,4 +37,34 @@ public enum PublicationStatus {
public Metadatum getLegacyMetadatum() {
return legacyInternalMetadatum;
}
public static Metadatum getLegacyPublicationStatusMetadatum(String input) {
Metadatum result;
if (null != input) {
if ( input.contains("Veröffentlichungsversion") || input.contains("Published Version") || input.equals("1") ) {
result = PublicationStatus.PUBLISHED_VERSION.getLegacyMetadatum();
}
else if (input.contains("Postprint") || input.equals("2") ) {
result = PublicationStatus.POSTPRINT.getLegacyMetadatum();
}
else if (input.contains("Preprint") || input.equals("3") ) {
result = PublicationStatus.PREPRINT.getLegacyMetadatum();
}
else if (input.contains("unbekannt") || input.contains("unknown") || input.equals("4") ) {
result = PublicationStatus.UNKNOWN.getLegacyMetadatum();
}
else if (input.contains("Erstveröffentlichung") || input.contains("Primary Publication") || input.equals("5") ) {
result = PublicationStatus.PRIMARY_PUBLICATION.getLegacyMetadatum();
}
else {
result = null;
}
}
else {
result = null;
}
return result;
}
}
......@@ -36,4 +36,31 @@ public enum ReviewStatus {
public Metadatum getLegacyMetadatum() {
return legacyInternalMetadatum;
}
public static Metadatum getLegacyReviewStatusMetadatum(String input) {
Metadatum result;
if (null != input) {
if ( input.contains("peer") || input.equals("1") ) {
result = ReviewStatus.PEER_REVIEWED.getLegacyMetadatum();
}
else if (input.contains("begutachtet") || input.contains("reviewed") || input.equals("2") ) {
result = ReviewStatus.REVIEWED.getLegacyMetadatum();
}
else if (input.contains("nicht") || input.contains("not") || input.equals("3") ) {
result = ReviewStatus.NOT_REVIEWED.getLegacyMetadatum();
}
else if (input.contains("unbekannt") || input.contains("unknown") || input.equals("4") ) {
result = ReviewStatus.UNKNOWN.getLegacyMetadatum();
}
else {
result = null;
}
}
else {
result = null;
}
return result;
}
}
......@@ -13,12 +13,13 @@ import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.gesis.dda.feeder.ssoar.Licence;
import org.gesis.dda.feeder.ssoar.PublicationStatus;
import org.gesis.dda.feeder.ssoar.ReviewStatus;
import org.gesis.dda.feeder.ssoar.Stock;
import org.gesis.dda.publishing.domain.Bundle;
import org.gesis.dda.publishing.domain.BundlesStreamSource;
import org.gesis.dda.publishing.domain.Metadatum;
import org.gesis.dda.xslt.TransformUtil;
import org.gesis.dda.xslt.XsltUtil;
import org.jbibtex.BibTeXDatabase;
import org.jbibtex.BibTeXEntry;
import org.jbibtex.BibTeXParser;
......@@ -236,19 +237,22 @@ public class BibtexBundlesStreamSource implements BundlesStreamSource {
private List<Metadatum> getPublicationStatusMetadatum(String cleanFieldValue) {
List<Metadatum> result = new ArrayList<>();
String internalValue = TransformUtil.getSsoarPublicationStatusId(cleanFieldValue);
if (null != internalValue) {
result.add(new SimpleMetadatum("internal.identifier.pubstatus", internalValue) );
// ISSUE-85
Metadatum legacyValue = PublicationStatus.getLegacyPublicationStatusMetadatum(cleanFieldValue);
if (null != legacyValue) {
result.add(legacyValue);
}
return result;
}
private List<Metadatum> getLicenceMetadatum(String cleanFieldValue) {
List<Metadatum> result = new ArrayList<>();
String internalValue = XsltUtil.getAppropriateSsoarLicenceInternalId(cleanFieldValue);
if (null != internalValue) {
result.add(new SimpleMetadatum("internal.identifier.licence", internalValue) );
// ISSUE-85
Metadatum legacyValue = Licence.getLegacyLicenceMetadatum(cleanFieldValue);
if (null != legacyValue) {
result.add(legacyValue);
}
return result;
}
......@@ -256,9 +260,10 @@ public class BibtexBundlesStreamSource implements BundlesStreamSource {
private List<Metadatum> getReviewStatusMetadatum(String cleanFieldValue) {
List<Metadatum> result = new ArrayList<>();
String internalValue = TransformUtil.getSsoarReviewStatusId(cleanFieldValue);
if (null != internalValue) {
result.add(new SimpleMetadatum("internal.identifier.review", internalValue) );
// ISSUE-85
Metadatum legacyValue = ReviewStatus.getLegacyReviewStatusMetadatum(cleanFieldValue);
if (null != legacyValue) {
result.add(legacyValue);
}
return result;
}
......
......@@ -2,32 +2,15 @@ package org.gesis.dda.xslt;
import org.gesis.dda.feeder.ssoar.PublicationStatus;
import org.gesis.dda.feeder.ssoar.ReviewStatus;
import org.gesis.dda.publishing.domain.Metadatum;
public class TransformUtil {
public static String getSsoarReviewStatusId(String input) {
String result;
if (null != input) {
if ( input.contains("peer") || input.equals("1") ) {
result = ReviewStatus.PEER_REVIEWED.getInternalIdentifierReviewMetadatum().getValue();
}
else if (input.contains("begutachtet") || input.contains("reviewed") || input.equals("2") ) {
result = ReviewStatus.REVIEWED.getInternalIdentifierReviewMetadatum().getValue();
}
else if (input.contains("nicht") || input.contains("not") || input.equals("3") ) {
result = ReviewStatus.NOT_REVIEWED.getInternalIdentifierReviewMetadatum().getValue();
}
else if (input.contains("unbekannt") || input.contains("unknown") || input.equals("4") ) {
result = ReviewStatus.UNKNOWN.getInternalIdentifierReviewMetadatum().getValue();
}
else {
result = null;
}
}
else {
result = null;
}
Metadatum interim = ReviewStatus.getLegacyReviewStatusMetadatum(input);
result = interim.getValue();
return result;
}
......@@ -35,29 +18,8 @@ public class TransformUtil {
public static String getSsoarPublicationStatusId(String input) {
String result;
if (null != input) {
if ( input.contains("Veröffentlichungsversion") || input.contains("Published Version") || input.equals("1") ) {
result = PublicationStatus.PUBLISHED_VERSION.getInternalIdentifierPubstatusMetadatum().getValue();
}
else if (input.contains("Postprint") || input.equals("2") ) {
result = PublicationStatus.POSTPRINT.getInternalIdentifierPubstatusMetadatum().getValue();
}
else if (input.contains("Preprint") || input.equals("3") ) {
result = PublicationStatus.PREPRINT.getInternalIdentifierPubstatusMetadatum().getValue();
}
else if (input.contains("unbekannt") || input.contains("unknown") || input.equals("4") ) {
result = PublicationStatus.UNKNOWN.getInternalIdentifierPubstatusMetadatum().getValue();
}
else if (input.contains("Erstveröffentlichung") || input.contains("Primary Publication") || input.equals("5") ) {
result = PublicationStatus.PRIMARY_PUBLICATION.getInternalIdentifierPubstatusMetadatum().getValue();
}
else {
result = null;
}
}
else {
result = null;
}
Metadatum interim = PublicationStatus.getLegacyPublicationStatusMetadatum(input);
result = interim.getValue();
return result;
}
......
package org.gesis.dda.xslt;
import org.gesis.dda.feeder.ssoar.Licence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.gesis.dda.publishing.domain.Metadatum;
public class XsltUtil {
private final static Logger LOG = LoggerFactory.getLogger(XsltUtil.class);
public static String hyphenizeIsbn(String input) {
String withoutMinusInput = input.replace("-", "");
String result;
// 978-3-8474-0969-4
result =
input.substring(0, 3) + "-" +
input.substring(3, 4) + "-" +
input.substring(4, 8) + "-" +
input.substring(8, 12) + "-" +
input.substring(12);
withoutMinusInput.substring(0, 3) + "-" +
withoutMinusInput.substring(3, 4) + "-" +
withoutMinusInput.substring(4, 8) + "-" +
withoutMinusInput.substring(8, 12) + "-" +
withoutMinusInput.substring(12);
return result;
}
......@@ -24,177 +22,8 @@ public class XsltUtil {
public static String getAppropriateSsoarLicenceInternalId(String input) {
String result;
Licence licenceResult = null;
boolean isCreativeCommons = false;
boolean isBy = false;
boolean isSa = false;
boolean isNc = false;
boolean isNd = false;
boolean isPublicDomain = false;
String version = null;
String inputLowerCase = input.toLowerCase();
if ( inputLowerCase.contains("creativecommons") || inputLowerCase.contains("cc") ) {
isCreativeCommons = true;
}
if ( inputLowerCase.contains("by") ) {
isBy = true;
}
if ( inputLowerCase.contains("sa") ) {
isSa = true;
}
if ( inputLowerCase.contains("nc") ) {
isNc = true;
}
if ( inputLowerCase.contains("nd") ) {
isNd = true;
}
if ( inputLowerCase.contains("nd") ) {
isNd = true;
}
if ( inputLowerCase.contains("zero") || inputLowerCase.contains("CC0") ) {
isPublicDomain = true;
}
if ( inputLowerCase.contains("4.0") ) {
version = "4.0";
}
else if ( inputLowerCase.contains("3.0") ) {
version = "3.0";
}
else if ( inputLowerCase.contains("2.0") ) {
version = "2.0";
}
else if ( inputLowerCase.contains("1.0") ) {
version = "1.0";
}
if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && !isNc && !isNd) {
if (null == version) {
licenceResult = Licence.CC_BY;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && isNc && isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_NC_ND;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_ND_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && isSa && !isNc && !isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_SA;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_SA_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && !isNc && isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_ND;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_2_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_ND_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && !isSa && isNc && !isNd) {
if (null == version) {
licenceResult = Licence.CC_BY_NC;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_3_0;
}
else if ( "3.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_3_0;
}
else if ( "4.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_4_0;
}
}
else if ( isCreativeCommons && !isPublicDomain && isBy && isSa && isNc && !isNd) {
licenceResult = Licence.CC_BY_NC_SA;
if (null == version) {
licenceResult = Licence.CC_BY_NC_SA;
}
else if ( "1.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_SA_1_0;
}
else if ( "2.0".equals(version) ) {
licenceResult = Licence.CC_BY_NC_SA_2_0;
}
else if ( "3.0".equals(version) ) {