1 # OpenILS::WWW::OAI manages OAI2 requests and responses.
3 # Copyright (c) 2014-2017 International Institute of Social History
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Author: Lucien van Wouw <lwo@iisg.nl>
22 package OpenILS::WWW::OAI;
23 use strict; use warnings;
24 use Apache2::Const -compile => qw(OK REDIRECT DECLINED NOT_FOUND :log);
26 use DateTime::Format::ISO8601;
28 use HTTP::OAI::Metadata::OAI_Identifier;
29 use HTTP::OAI::Repository qw/:validate/;
30 use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
33 use OpenSRF::EX qw(:try);
34 use OpenSRF::Utils::Logger qw/$logger/;
41 $repository_identifier,
69 OpenSRF::System->bootstrap_client( config_file => $bootstrap );
71 my $idl = OpenSRF::Utils::SettingsClient->new->config_value('IDL');
72 Fieldmapper->import(IDL => $idl);
74 $oai = OpenSRF::AppSession->create('open-ils.oai');
75 $parser = new XML::LibXML;
76 $xslt = new XML::LibXSLT;
78 my $app_settings = OpenSRF::Utils::SettingsClient->new->config_value(apps => 'open-ils.oai')->{'app_settings'};
79 $base_url = $app_settings->{'base_url'} || 'localhost';
80 $base_url =~/(.*)\/$/ ; # Keep all minus the trailing forward slash.
81 $repository_identifier = $app_settings->{'repository_identifier'} || 'localhost';
82 $repository_name = $app_settings->{'repository_name '} || 'A name';
83 $admin_email = $app_settings->{'admin_email'} || 'adminEmail@' . $repository_identifier ;
84 $earliest_datestamp = $app_settings->{'earliest_datestamp'} || '0001-01-01' ;
85 $deleted_record = $app_settings->{'deleted_record'} || 'yes' ;
86 $max_count = $app_settings->{'max_count'} || 50;
87 $granularity = $app_settings->{'granularity' } || 'YYYY-MM-DDThh:mm:ss';
88 $scheme = $app_settings->{'scheme'} || 'oai';
89 $delimiter = $app_settings->{'delimiter'} || ':';
90 $sample_identifier = $app_settings->{'sample_identifier'} || $scheme . $delimiter . $repository_identifier . $delimiter . '12345' ;
91 $list_sets = $app_settings->{'list_sets'} || 0;
94 _load_oaisets_authority();
95 _load_oaisets_biblio();
97 _load_oai_metadataformats();
99 return Apache2::Const::OK;
106 return Apache2::Const::DECLINED if (-e $apache->filename);
108 unless (defined $oai) {
109 $logger->error('Application session variables not defined. Add \'PerlChildInitHandler OpenILS::WWW::OAI::child_init\' to the Apache virtual host configuration file.');
115 if ( $cgi->path_info =~ /\/(authority|biblio)/ ) {
118 return Apache2::Const::NOT_FOUND ;
121 my %attr = $cgi->Vars();
122 my $requestURL = $base_url
123 . '/' . $record_class
125 . join('&', map { "$_=$attr{$_}" } keys %attr);
126 $logger->info('Request url=' . $requestURL ) ;
129 my @errors = validate_request( %attr );
132 # Retrieve our parameters
133 my $verb = delete( $attr{verb} );
134 my $identifier = $attr{identifier};
135 my $metadataPrefix = $attr{metadataPrefix} ;
136 my $from = $attr{from};
137 my $until = $attr{'until'};
138 my $set = $attr{set};
139 my $resumptionToken = decode_base64($attr{resumptionToken} ) if $attr{resumptionToken};
141 if ( $resumptionToken ) {
142 ($metadataPrefix, $from, $until, $set, $offset) = split( '\$', $resumptionToken );
147 my $_set = $oai_sets->{$set};
148 if ( $_set && $_set->{id} && $_set->{record_class} eq $record_class) {
151 push @errors, new HTTP::OAI::Error(code=>'noRecordsMatch', message=>"Set argument doesn't match any sets. The setSpec was '$set'") ;
155 # Are the from and until ranges aligned ?
156 if ( $from && $until ) {
158 my $_until = $until ;
159 $_from =~ s/[-T:\.\+Z]//g ; # '2001-02-03T04:05:06Z' becomes '20010203040506'
160 $_until =~ s/[-T:\.\+Z]//g ;
161 push @errors, new HTTP::OAI::Error(code=>'badArgument', message=>'Bad date values, must have from<=until') unless ($_from <= $_until);
164 # Is this metadataformat available ?
165 push @errors, new HTTP::OAI::Error(code=>'cannotDisseminateFormat', message=>'The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository') unless ( ($verb eq 'ListMetadataFormats' || $verb eq 'ListSets' || $verb eq 'Identify') || $oai_metadataformats->{$metadataPrefix} );
169 # Now prepare the response
170 if ( $verb eq 'ListRecords' ) {
171 $response = listRecords( $record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset);
173 elsif ( $verb eq 'ListMetadataFormats' ) {
174 $response = listMetadataFormats();
176 elsif ( $verb eq 'ListSets' ) {
177 $response = listSets( $record_class, $requestURL );
179 elsif ( $verb eq 'GetRecord' ) {
180 $response = getRecord( $record_class, $requestURL, $identifier, $metadataPrefix);
182 elsif ( $verb eq 'ListIdentifiers' ) {
183 $response = listIdentifiers( $record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset);
186 $response = identify($record_class);
192 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
193 $response->errors(@errors);
196 $cgi->header(-type=>'text/xml', -charset=>'utf-8');
197 $cgi->print($response->toDOM->toString());
199 return Apache2::Const::OK;
205 my $record_class = shift;
207 my $response = HTTP::OAI::Identify->new(
208 protocolVersion => '2.0',
209 baseURL => $base_url . '/' . $record_class,
210 repositoryName => $repository_name,
211 adminEmail => $admin_email,
212 MaxCount => $max_count,
213 granularity => $granularity,
214 earliestDatestamp => $earliest_datestamp,
215 deletedRecord => $deleted_record
218 $response->description(
219 HTTP::OAI::Metadata::OAI_Identifier->new(
221 'repositoryIdentifier' , $repository_identifier,
222 'delimiter', $delimiter,
223 'sampleIdentifier', $sample_identifier
231 sub listMetadataFormats {
233 my $response = HTTP::OAI::ListMetadataFormats->new();
234 foreach my $metadataPrefix (keys %$oai_metadataformats) {
235 my $metadata_format = $oai_metadataformats->{$metadataPrefix} ;
236 $response->metadataFormat( HTTP::OAI::MetadataFormat->new(
237 metadataPrefix => $metadataPrefix,
238 schema => $metadata_format->{schema},
239 metadataNamespace => $metadata_format->{metadataNamespace}
249 my ($record_class, $requestURL ) = @_;
252 my $response = HTTP::OAI::ListSets->new( );
253 foreach my $key (keys %$oai_sets) {
254 my $set = $oai_sets->{$key} ;
255 if ( $set && $set->{setSpec} && $set->{record_class} eq $record_class ) {
258 setSpec => $set->{setSpec},
259 setName => $set->{setName}
266 my @errors = (new HTTP::OAI::Error(code=>'noSetHierarchy', message=>'The repository does not support sets.') ) ;
267 my $response = HTTP::OAI::Response->new( requestURL => $requestURL );
268 $response->errors(@errors);
276 my ($record_class, $requestURL, $identifier, $metadataPrefix ) = @_;
281 # Do we have a valid identifier ?
282 my $regex_identifier = "^${scheme}${delimiter}${repository_identifier}${delimiter}([0-9]+)\$";
283 if ( $identifier =~ /$regex_identifier/i ) {
286 # Do we have a record ?
287 my $record = $oai->request('open-ils.oai.list.retrieve', $record_class, $rec_id, undef, undef, undef, 1, $deleted_record)->gather(1) ;
289 $response = HTTP::OAI::GetRecord->new();
290 my $o = "Fieldmapper::oai::$record_class"->new(@$record[0]);
291 $response->record(_record($record_class, $o, $metadataPrefix));
293 push @errors, new HTTP::OAI::Error(code=>'idDoesNotExist', message=>'The value of the identifier argument is unknown or illegal in this repository.') ;
297 push @errors, new HTTP::OAI::Error(code=>'idDoesNotExist', message=>'The value of the identifier argument is unknown or illegal in this repository.') ;
301 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
302 $response->errors(@errors);
309 sub listIdentifiers {
311 my ($record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset ) = @_;
314 my $r = $oai->request('open-ils.oai.list.retrieve', $record_class, $offset, $from, $until, $set, $max_count, $deleted_record)->gather(1) ;
317 $response = HTTP::OAI::ListIdentifiers->new();
318 for my $record (@$r) {
319 my $o = "Fieldmapper::oai::$record_class"->new($record) ;
320 if ( $cursor++ == $max_count ) {
321 my $token = new HTTP::OAI::ResumptionToken( resumptionToken => encode_base64(join( '$', $metadataPrefix, $from, $until, $oai_sets->{$set}->{setSpec}, $o->rec_id ), '' ) ) ;
322 $token->cursor($offset);
323 $response->resumptionToken($token) ;
325 $response->identifier( _header($record_class, $o)) ;
329 my @errors = (new HTTP::OAI::Error(code=>'noRecordsMatch', message=>'The combination of the values of the from, until, set, and metadataPrefix arguments results in an empty list.') ) ;
330 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
331 $response->errors(@errors);
340 my ($record_class, $requestURL, $from, $until, $set, $metadataPrefix, $offset ) = @_;
343 my $r = $oai->request('open-ils.oai.list.retrieve', $record_class, $offset, $from, $until, $set, $max_count, $deleted_record)->gather(1) ;
346 $response = HTTP::OAI::ListRecords->new();
347 for my $record (@$r) {
348 my $o = "Fieldmapper::oai::$record_class"->new($record) ;
349 if ( $cursor++ == $max_count ) {
350 my $token = new HTTP::OAI::ResumptionToken( resumptionToken => encode_base64(join( '$', $metadataPrefix, $from, $until, $oai_sets->{$set}->{setSpec}, $o->rec_id ), '' ) ) ;
351 $token->cursor($offset);
352 $response->resumptionToken($token) ;
354 $response->record(_record($record_class, $o, $metadataPrefix));
358 my @errors = (new HTTP::OAI::Error(code=>'noRecordsMatch', message=>'The combination of the values of the from, until, set, and metadataPrefix arguments results in an empty list.') ) ;
359 $response = HTTP::OAI::Response->new( requestURL => $requestURL );
360 $response->errors(@errors);
369 my ($record_class, $o) = @_;
372 my $status = 'deleted' if ($o->deleted eq 't');
373 my $s = $o->set_spec; # Here we get an array that was parsed as a string like "{1,2,3,4}"
374 $s =~ s/[{}]//g ; # We remove the {}
375 foreach (split(',', $s)) { # and turn this into an array.
376 my $_set = $oai_sets->{$_};
377 push @set_spec, $_set->{setSpec} if ( $_set && $_set->{record_class} eq $record_class) ;
380 return new HTTP::OAI::Header(
381 identifier => $scheme . $delimiter . $repository_identifier . $delimiter . $o->rec_id,
382 datestamp => substr($o->datestamp, 0, 19) . 'Z',
384 setSpec => \@set_spec
391 my ($record_class, $o, $metadataPrefix ) = @_;
393 my $record = HTTP::OAI::Record->new();
394 $record->header( _header($record_class, $o) );
396 if ( $o->deleted eq 'f' ) {
397 my $md = new HTTP::OAI::Metadata() ;
398 my $xml = $oai->request('open-ils.oai.' . $record_class . '.retrieve', $o->rec_id, $metadataPrefix)->gather(1) ;
399 $md->dom( $parser->parse_string('<metadata>' . $xml . '</metadata>') ); # Not sure why I need to add the metadata element,
400 $record->metadata( $md ); # because I expect ->metadata() would provide the wrapper for it.
407 # _load_oaisets_authority
408 # Populate the $oai_sets hash with the sets for authority records.
409 # oai_sets = {id\setSpec => {id, setSpec, setName, record_class = 'authority' }}
410 sub _load_oaisets_authority {
412 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
413 my $r = $ses->request('open-ils.cstore.direct.authority.browse_axis.search.atomic',
414 {code => {'!=' => undef } } )->gather(1);
416 for my $record (@$r) {
417 my $o = Fieldmapper::authority::browse_axis->new($record) ;
418 $oai_sets->{$o->code} = {
421 setName => $o->description, # description is more verbose than $o->name
422 record_class => 'authority'
428 # _load_oaisets_biblio
429 # Populate the $oai_sets hash with the sets for bibliographic records. Those are org_type records
430 # oai_sets = {id\setSpec => {id, setSpec, setName, record_class = 'biblio' }}
431 sub _load_oaisets_biblio {
438 my $ses = OpenSRF::AppSession->create('open-ils.actor');
439 $node = $ses->request('open-ils.actor.org_tree.retrieve')->gather(1);
440 my $aout = $ses->request('open-ils.actor.org_types.retrieve')->gather(1);
442 return unless ($node) ;
445 foreach my $type (@$aout) {
446 $_types[int($type->id)] = $type;
451 return unless ($node->opac_visible =~ /^[y1t]+/i);
453 my $spec = ($parent) ? $parent . ':' . $node->shortname : $node->shortname ;
454 $oai_sets->{$spec} = {id => $node->id, record_class => 'biblio' };
455 $oai_sets->{$node->id} = {setSpec => $spec, setName => $node->name, record_class => 'biblio' };
457 my $kids = $node->children;
458 _load_oaisets_biblio($_, $types, $spec) for (@$kids);
462 # _load_oai_metadataformats
463 # Populate the $oai_metadataformats hash with the supported metadata formats:
464 # oai_metadataformats = { metadataPrefix => { schema, metadataNamespace } }
465 sub _load_oai_metadataformats {
467 my $list = $oai->request('open-ils.oai.record.formats')->gather(1);
468 for my $record_browse_format ( @$list ) {
469 my %h = %$record_browse_format ;
470 my $metadataPrefix = (keys %h)[0] ;
471 $oai_metadataformats->{$metadataPrefix} = {
472 schema => $h{$metadataPrefix}->{'namespace_uri'},
473 metadataNamespace => $h{$metadataPrefix}->{'schema_location'}