Commit f48a2537 authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

DOAbooks - added licence filter

parent 581542c4
...@@ -15,7 +15,8 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter { ...@@ -15,7 +15,8 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter {
private final static Logger LOG = LoggerFactory.getLogger(Doabooks2SsoarBundleFilter.class); private final static Logger LOG = LoggerFactory.getLogger(Doabooks2SsoarBundleFilter.class);
/** /**
* SSOAR only gets the given class notation's publications from this source * SSOAR only gets the given class notation's publications from this source.
* In addition only known licenced publications are accepted.
*/ */
@Override @Override
public boolean test(Bundle bundle) { public boolean test(Bundle bundle) {
...@@ -25,13 +26,30 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter { ...@@ -25,13 +26,30 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter {
"Political science", "Political science",
"Political science (General)", "Political science (General)",
"Political theory"}; "Political theory"};
String[] licenceStringsToBeFilteredTo = {
"Attribution (CC by)",
"Attribution No Derivatives (CC by-nd)",
"Attribution Non-commercial (CC by-nc)",
"Attribution Non-commercial No Derivatives (CC by-nc-nd)",
"Attribution Non-commercial Share Alike (CC by-nc-sa)",
"Attribution Share Alike (CC by-sa)",
"CC BY 3.0",
"CC BY IGO 3.0"};
Set<Metadatum> metadata = bundle.getMetadata(); Set<Metadatum> metadata = bundle.getMetadata();
Set<Metadatum> clasShorts = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classhort") ).collect(Collectors.toSet()); Set<Metadatum> clasShorts = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classhort") ).collect(Collectors.toSet());
if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue()))) { Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.dc.rights.licence") ).findFirst().orElse(null);
LOG.debug("clasShort-Set not null."); if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue()))
&& null != licence) {
LOG.debug("clasShort-Set not null and a licence entry is found");
if (clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())).count() > 0) { if (clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())).count() > 0) {
result = true; if (Arrays.asList(licenceStringsToBeFilteredTo).stream().anyMatch( str -> licence.getValue().indexOf(str) != -1)) {
LOG.debug("We take it: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; "))); result = true;
LOG.debug("We take it: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; ")));
} else {
result = false;
LOG.debug("Filtered away because of licence: {}", licence.getValue());
}
} }
else { else {
result = false; result = false;
......
...@@ -813,7 +813,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { ...@@ -813,7 +813,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.url", "https://www.doabooks.org/doab?func=fulltext&rid=12555") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.url", "https://www.doabooks.org/doab?func=fulltext&rid=12555") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() ); assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "16") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "2") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "3") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "3") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "Sustainable Planning and Technologies") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "Sustainable Planning and Technologies") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "192") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "192") ) );
...@@ -853,7 +853,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { ...@@ -853,7 +853,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
assertTrue( metadata2.contains( new SimpleMetadatum("dc.identifier.url", "https://www.doabooks.org/doab?func=fulltext&rid=12703") ) ); assertTrue( metadata2.contains( new SimpleMetadatum("dc.identifier.url", "https://www.doabooks.org/doab?func=fulltext&rid=12703") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() ); assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
// assertTrue( metadata2.contains( new SimpleMetadatum("dc.language", "en") ) ); // assertTrue( metadata2.contains( new SimpleMetadatum("dc.language", "en") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.rights.licence", "16") ) ); assertTrue( metadata2.contains( new SimpleMetadatum("dc.rights.licence", "10") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "3") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "3") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "Sustainable Planning and Technologies") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "Sustainable Planning and Technologies") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.source.pageinfo", "368") ) ); assertTrue( metadata2.contains( new SimpleMetadatum("dc.source.pageinfo", "368") ) );
......
...@@ -63,11 +63,11 @@ ...@@ -63,11 +63,11 @@
<xsl:with-param name="value" select="'20'"/> <xsl:with-param name="value" select="'20'"/>
</xsl:call-template> </xsl:call-template>
<xsl:call-template name="new-metadatum"> <!--xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'16'"/> <xsl:with-param name="value" select="'16'"/>
</xsl:call-template> </xsl:call-template-->
<xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.pubstatus'"/> <xsl:with-param name="key" select="'dc.description.pubstatus'"/>
<xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/>
...@@ -115,6 +115,7 @@ ...@@ -115,6 +115,7 @@
<xsl:apply-templates select="marcxml:datafield[@tag='720']/child::node()[@code='a']"/> <xsl:apply-templates select="marcxml:datafield[@tag='720']/child::node()[@code='a']"/>
<!--xsl:apply-templates select="marcxml:datafield[@tag='786']/child::node()[@code='n']"/--> <!--xsl:apply-templates select="marcxml:datafield[@tag='786']/child::node()[@code='n']"/-->
<xsl:apply-templates select="marcxml:datafield[@tag='856']/child::node()[@code='u']"/> <xsl:apply-templates select="marcxml:datafield[@tag='856']/child::node()[@code='u']"/>
<xsl:apply-templates select="marcxml:datafield[@tag='856']/child::node()[@code='z']"/>
</xsl:template> </xsl:template>
<xsl:template match="marcxml:datafield[@tag='020']/child::node()[@code='a']"> <xsl:template match="marcxml:datafield[@tag='020']/child::node()[@code='a']">
...@@ -302,7 +303,66 @@ ...@@ -302,7 +303,66 @@
</xsl:call-template> </xsl:call-template>
</xsl:if> </xsl:if>
</xsl:template> </xsl:template>
<xsl:template match="marcxml:datafield[@tag='856']/child::node()[@code='z']">
<xsl:choose>
<xsl:when test="contains(text(), 'Attribution (CC by)')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="contains(text(), 'Attribution No Derivatives (CC by-nd)')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'9'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="contains(text(), 'Attribution Non-commercial (CC by-nc)')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'10'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="contains(text(), 'Attribution Non-commercial No Derivatives (CC by-nc-nd)')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'2'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="contains(text(), 'Attribution Non-commercial Share Alike (CC by-nc-sa)')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'11'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="contains(text(), 'Attribution Share Alike (CC by-sa)')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'8'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="contains(text(), 'CC BY 3.0')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'15'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="contains(text(), 'CC BY IGO 3.0')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'15'"/>
</xsl:call-template>
</xsl:when>
</xsl:choose>
</xsl:template>
<xsl:template name="new-metadatum"> <xsl:template name="new-metadatum">
<xsl:param name="key" /> <xsl:param name="key" />
<xsl:param name="language" /> <xsl:param name="language" />
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment