1 # OpenILS::WWW::SuperCat::OAI manages OAI2 requests and responses.
3 # Copyright (c) 2014-2017 International Institute of Social History
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Author: Lucien van Wouw <lwo@iisg.nl>
22 package OpenILS::WWW::SuperCat::OAI;
23 use strict; use warnings;
24 use Apache2::Const -compile => qw(OK REDIRECT DECLINED NOT_FOUND :log);
26 use DateTime::Format::ISO8601;
28 use HTTP::OAI::Metadata::OAI_Identifier;
29 use HTTP::OAI::Repository qw/:validate/;
30 use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
33 use OpenSRF::EX qw(:try);
34 use OpenSRF::Utils::Logger qw/$logger/;
35 use OpenILS::Utils::Fieldmapper;
36 use OpenILS::Utils::CStoreEditor;
37 use OpenILS::Application::AppUtils;
41 my $U = 'OpenILS::Application::AppUtils';
46 $repository_identifier,
72 OpenSRF::System->bootstrap_client( config_file => $bootstrap );
74 my $idl = OpenSRF::Utils::SettingsClient->new->config_value('IDL');
75 Fieldmapper->import(IDL => $idl);
76 OpenILS::Utils::CStoreEditor->init; # just in case
78 $parser = new XML::LibXML;
79 $xslt = new XML::LibXSLT;
81 my $app_settings = OpenSRF::Utils::SettingsClient->new->config_value(apps => 'open-ils.supercat')->{'app_settings'}->{'oai'};
82 $base_url = $app_settings->{'base_url'} || 'localhost';
83 $base_url =~/(.*)\/$/ ; # Keep all minus the trailing forward slash.
84 $repository_identifier = $app_settings->{'repository_identifier'} || 'localhost';
85 $repository_name = $app_settings->{'repository_name '} || 'A name';
86 $admin_email = $app_settings->{'admin_email'} || 'adminEmail@' . $repository_identifier ;
87 $earliest_datestamp = $app_settings->{'earliest_datestamp'} || '0001-01-01' ;
88 $deleted_record = $app_settings->{'deleted_record'} || 'yes' ;
89 $max_count = $app_settings->{'max_count'} || 50;
90 $granularity = $app_settings->{'granularity' } || 'YYYY-MM-DDThh:mm:ss';
91 $scheme = $app_settings->{'scheme'} || 'oai';
92 $delimiter = $app_settings->{'delimiter'} || ':';
93 $sample_identifier = $app_settings->{'sample_identifier'} || $scheme . $delimiter . $repository_identifier . $delimiter . '12345' ;
95 $logger->info('Default OAI repo settings in place, loading sets...');
97 _load_oaisets_biblio();
98 _load_oaisets_authority();
99 _load_oai_metadataformats();
101 $logger->info('... sets loaded.');
103 return Apache2::Const::OK;
110 return Apache2::Const::DECLINED if (-e $apache->filename);
114 if ( $cgi->path_info =~ /\/(authority|biblio)/ ) {
117 return Apache2::Const::NOT_FOUND ;
120 my %attr = $cgi->Vars();
121 my $requestURL = $base_url
122 . '/' . $record_class
124 . join('&', map { "$_=$attr{$_}" } keys %attr);
125 $logger->info('Request url=' . $requestURL ) ;
128 my @errors = validate_request( %attr );
131 # Retrieve our parameters
132 my $verb = delete( $attr{verb} );
133 my $identifier = $attr{identifier};
134 my $metadataPrefix = $attr{metadataPrefix} ;
135 my $from = $attr{from};
136 my $until = $attr{'until'};
137 my $set = $attr{set};
138 my $resumptionToken = decode_base64($attr{resumptionToken} ) if $attr{resumptionToken};
140 if ( $resumptionToken ) {
141 ($metadataPrefix, $from, $until, $set, $offset) = split( '\$', $resumptionToken );
146 my $_set = $oai_sets->{$set};
147 if ( $_set && $_set->{id} && $_set->{record_class} eq $record_class) {
150 push @errors, new HTTP::OAI::Error(code=>'noRecordsMatch', message=>"Set argument doesn't match any sets. The setSpec was '$set'") ;
154 # Are the from and until ranges aligned ?
155 if ( $from && $until ) {
157 my $_until = $until ;
158 $_from =~ s/[-T:\.\+Z]//g ; # '2001-02-03T04:05:06Z' becomes '20010203040506'
159 $_until =~ s/[-T:\.\+Z]//g ;
160 push @errors, new HTTP::OAI::Error(code=>'badArgument', message=>'Bad date values, must have from<=until') unless ($_from <= $_until);
163 # Is this metadataformat available ?
164 push @errors, new HTTP::OAI::Error(code=>'cannotDisseminateFormat', message=>'The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository') unless ( ($verb eq 'ListMetadataFormats' || $verb eq 'ListSets' || $verb eq 'Identify') || $oai_metadataformats->{$metadataPrefix} );
168 # Now prepare the response
169 if ( $verb eq 'ListRecords' ) {
170 $response = listRecords( $record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset);
172 elsif ( $verb eq 'ListMetadataFormats' ) {
173 $response = listMetadataFormats();
175 elsif ( $verb eq 'ListSets' ) {
176 $response = listSets( $record_class, $requestURL );
178 elsif ( $verb eq 'GetRecord' ) {
179 $response = getRecord( $record_class, $requestURL, $identifier, $metadataPrefix, $set);
181 elsif ( $verb eq 'ListIdentifiers' ) {
182 $response = listIdentifiers( $record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset);
185 $response = identify($record_class);
191 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
192 $response->errors(@errors);
195 $cgi->header(-type=>'text/xml', -charset=>'utf-8');
196 $cgi->print($response->toDOM->toString());
198 return Apache2::Const::OK;
204 my $record_class = shift;
206 my $response = HTTP::OAI::Identify->new(
207 protocolVersion => '2.0',
208 baseURL => $base_url . '/' . $record_class,
209 repositoryName => $repository_name,
210 adminEmail => $admin_email,
211 MaxCount => $max_count,
212 granularity => $granularity,
213 earliestDatestamp => $earliest_datestamp,
214 deletedRecord => $deleted_record
217 $response->description(
218 HTTP::OAI::Metadata::OAI_Identifier->new(
220 'repositoryIdentifier' , $repository_identifier,
221 'delimiter', $delimiter,
222 'sampleIdentifier', $sample_identifier
230 sub listMetadataFormats {
232 my $response = HTTP::OAI::ListMetadataFormats->new();
233 foreach my $metadataPrefix (keys %$oai_metadataformats) {
234 my $metadata_format = $oai_metadataformats->{$metadataPrefix} ;
235 $response->metadataFormat( HTTP::OAI::MetadataFormat->new(
236 metadataPrefix => $metadataPrefix,
237 schema => $metadata_format->{schema},
238 metadataNamespace => $metadata_format->{metadataNamespace}
248 my ($record_class, $requestURL ) = @_;
251 my $response = HTTP::OAI::ListSets->new( );
252 foreach my $key (keys %$oai_sets) {
253 my $set = $oai_sets->{$key} ;
254 if ( $set && $set->{setSpec} && $set->{record_class} eq $record_class ) {
257 setSpec => $set->{setSpec},
258 setName => $set->{setName}
265 my @errors = (new HTTP::OAI::Error(code=>'noSetHierarchy', message=>'The repository does not support sets.') ) ;
266 my $response = HTTP::OAI::Response->new( requestURL => $requestURL );
267 $response->errors(@errors);
275 my ($record_class, $requestURL, $identifier, $metadataPrefix, $set ) = @_;
280 # Do we have a valid identifier ?
281 my $regex_identifier = "^${scheme}${delimiter}${repository_identifier}${delimiter}([0-9]+)\$";
282 if ( $identifier =~ /$regex_identifier/i ) {
285 # Do we have a record ?
286 my $record = $U->simplereq('open-ils.supercat','open-ils.supercat.oai.list.retrieve', $record_class, $rec_id, undef, undef, undef, 1, $deleted_record);
288 $response = HTTP::OAI::GetRecord->new();
289 $response->record(_record($record_class, $$record[0], $metadataPrefix, $set));
291 push @errors, new HTTP::OAI::Error(code=>'idDoesNotExist', message=>'The value of the identifier argument is unknown or illegal in this repository.') ;
295 push @errors, new HTTP::OAI::Error(code=>'idDoesNotExist', message=>'The value of the identifier argument is unknown or illegal in this repository.') ;
299 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
300 $response->errors(@errors);
307 sub listIdentifiers {
309 my ($record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset ) = @_;
312 my $r = $U->simplereq('open-ils.supercat','open-ils.supercat.oai.list.retrieve', $record_class, $offset, $from, $until, $oai_sets->{$set}->{setSpec}, $max_count, $deleted_record);
315 $response = HTTP::OAI::ListIdentifiers->new();
316 for my $record (@$r) {
317 if ( $cursor++ == $max_count ) {
318 my $token = new HTTP::OAI::ResumptionToken( resumptionToken => encode_base64(join( '$', $metadataPrefix, $from, $until, $oai_sets->{$set}->{setSpec}, $record->rec_id ), '' ) ) ;
319 $token->cursor($offset);
320 $response->resumptionToken($token) ;
322 $response->identifier( _header($record_class, $record)) ;
326 my @errors = (new HTTP::OAI::Error(code=>'noRecordsMatch', message=>'The combination of the values of the from, until, set, and metadataPrefix arguments results in an empty list.') ) ;
327 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
328 $response->errors(@errors);
337 my ($record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset ) = @_;
340 my $r = $U->simplereq('open-ils.supercat','open-ils.supercat.oai.list.retrieve', $record_class, $offset, $from, $until, $oai_sets->{$set}->{setSpec}, $max_count, $deleted_record);
343 $response = HTTP::OAI::ListRecords->new();
344 for my $record (@$r) {
345 if ( $cursor++ == $max_count ) {
346 my $token = new HTTP::OAI::ResumptionToken( resumptionToken => encode_base64(join( '$', $metadataPrefix, $from, $until, $oai_sets->{$set}->{setSpec}, $record->rec_id ), '' ) ) ;
347 $token->cursor($offset);
348 $response->resumptionToken($token) ;
350 $response->record(_record($record_class, $record, $metadataPrefix, $set));
354 my @errors = (new HTTP::OAI::Error(code=>'noRecordsMatch', message=>'The combination of the values of the from, until, set, and metadataPrefix arguments results in an empty list.') ) ;
355 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
356 $response->errors(@errors);
365 my ($record_class, $o) = @_;
368 my $status = 'deleted' if ($o->deleted eq 't');
369 my $s = $o->set_spec; # Here we get an array that was parsed as a string like "{1,2,3,4}"
370 $s =~ s/[{}]//g ; # We remove the {}
371 foreach (split(',', $s)) { # and turn this into an array.
372 my $_set = $oai_sets->{$_};
373 push @set_spec, $_set->{setSpec} if ( $_set && $_set->{record_class} eq $record_class) ;
376 return new HTTP::OAI::Header(
377 identifier => $scheme . $delimiter . $repository_identifier . $delimiter . $o->rec_id,
378 datestamp => substr($o->datestamp, 0, 19) . 'Z',
380 setSpec => \@set_spec
387 my ($record_class, $o, $metadataPrefix, $set ) = @_;
389 my $record = HTTP::OAI::Record->new();
390 $record->header( _header($record_class, $o) );
392 if ( $o->deleted eq 'f' ) {
393 my $md = new HTTP::OAI::Metadata() ;
394 my $xml = $U->simplereq('open-ils.supercat','open-ils.supercat.oai.' . $record_class . '.retrieve', $o->rec_id, $metadataPrefix, $oai_sets->{$set}->{setSpec});
395 $xml =~ s/^<\?xml[^?]+?\?>//;
396 $md->dom( $parser->parse_string('<metadata>' . $xml . '</metadata>') ); # Not sure why I need to add the metadata element,
397 $record->metadata( $md ); # because I expect ->metadata() would provide the wrapper for it.
404 # _load_oaisets_authority
405 # Populate the $oai_sets hash with the sets for authority records.
406 # oai_sets = {id\setSpec => {id, setSpec, setName, record_class = 'authority' }}
407 sub _load_oaisets_authority {
409 my $axes = $U->simplereq('open-ils.cstore','open-ils.cstore.direct.authority.browse_axis.search.atomic', {code => {'!=' => undef } } );
412 $oai_sets->{$o->code} = {
415 setName => $o->description, # description is more verbose than $o->name
416 record_class => 'authority'
422 # _load_oaisets_biblio
423 # Populate the $oai_sets hash with the sets for bibliographic records. Those are org_type records
424 # oai_sets = {id\setSpec => {id, setSpec, setName, record_class = 'biblio' }}
427 sub _load_oaisets_biblio {
433 $org_tree ||= $U->get_org_tree;
434 $bib_sources ||= $U->simplereq('open-ils.cat','open-ils.cat.bib_sources.retrieve.all');
439 return unless ($node->opac_visible =~ /^[y1t]+/i);
442 my $ou_hierarchy_string = ($parent) ? $parent . ':' . $node->shortname : $node->shortname ;
443 $logger->info('Registering setSpec list for ' . $ou_hierarchy_string);
445 my $cspec = 'COPIES:'.$ou_hierarchy_string;
446 $oai_sets->{$cspec} = {id => 'C'.$node->id, record_class => 'biblio' };
447 $oai_sets->{'C'.$node->id} = {setSpec => $cspec, setName => $node->name . ' / by copies', record_class => 'biblio' };
449 my $lspec = 'LURIS:'.$ou_hierarchy_string;
450 $oai_sets->{$lspec} = {id => 'L'.$node->id, record_class => 'biblio' };
451 $oai_sets->{'L'.$node->id} = {setSpec => $lspec, setName => $node->name . ' / by LURIs', record_class => 'biblio' };
453 my $clspec = $cspec . '!' . $lspec;
454 $oai_sets->{$clspec} = {id => 'CL'.$node->id, record_class => 'biblio' };
455 $oai_sets->{'CL'.$node->id} = {setSpec => $clspec, setName => $node->name . ' / by copies and LURIs', record_class => 'biblio' };
459 for my $s (@$bib_sources) {
461 my $sspec = 'SOURCES:'.$s->source;
462 $oai_sets->{$sspec} = {id => 'S'.$s->id, record_class => 'biblio' };
463 $oai_sets->{'S'.$s->id} = {setSpec => $sspec, setName => $s->source . ' / by source', record_class => 'biblio' };
465 my $csspec = $cspec . '!' . $sspec;
466 $oai_sets->{$csspec} = {id => $s->id.'CS'.$node->id, record_class => 'biblio' };
467 $oai_sets->{$s->id.'CS'.$node->id} = {setSpec => $csspec, setName => $node->name . ' / by copies and source', record_class => 'biblio' };
469 my $lsspec = $lspec . '!' . $sspec;
470 $oai_sets->{$lsspec} = {id => $s->id.'LS'.$node->id, record_class => 'biblio' };
471 $oai_sets->{$s->id.'LS'.$node->id} = {setSpec => $lsspec, setName => $node->name . ' / by LURIs and source', record_class => 'biblio' };
473 my $clsspec = $clspec . '!' . $sspec;
474 $oai_sets->{$clsspec} = {id => $s->id.'CLS'.$node->id, record_class => 'biblio' };
475 $oai_sets->{$s->id.'CLS'.$node->id} = {setSpec => $clsspec, setName => $node->name . ' / by copies, LURIs, and source', record_class => 'biblio' };
479 my $kids = $node->children;
480 _load_oaisets_biblio($_, $ou_hierarchy_string) for (@$kids);
484 # _load_oai_metadataformats
485 # Populate the $oai_metadataformats hash with the supported metadata formats:
486 # oai_metadataformats = { metadataPrefix => { schema, metadataNamespace } }
487 sub _load_oai_metadataformats {
489 my $list = $U->simplereq('open-ils.supercat','open-ils.supercat.oai.record.formats');
490 for my $record_browse_format ( @$list ) {
491 my %h = %$record_browse_format ;
492 my $metadataPrefix = (keys %h)[0] ;
493 $oai_metadataformats->{$metadataPrefix} = {
494 schema => $h{$metadataPrefix}->{'namespace_uri'},
495 metadataNamespace => $h{$metadataPrefix}->{'schema_location'}