1 # OpenILS::Application::SuperCat::OAI manages OAI2 requests and responses.
3 # Copyright (c) 2014-2017 International Institute of Social History
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Author: Lucien van Wouw <lwo@iisg.nl>
22 package OpenILS::Application::SuperCat::OAI;
23 use strict; use warnings;
25 use base qw/OpenILS::Application/;
26 use OpenSRF::AppSession;
27 use OpenSRF::EX qw(:try);
29 use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
30 use OpenSRF::Utils::SettingsClient;
31 use OpenSRF::Utils::Logger qw($logger);
34 my $U = 'OpenILS::Application::AppUtils';
42 %authority_browse_axis_cache,
52 $_parser = new XML::LibXML;
55 $_xslt = new XML::LibXSLT;
57 # Load the metadataformats that are configured.
58 my $metadata_format = OpenSRF::Utils::SettingsClient->new->config_value(apps => 'open-ils.supercat')->{'app_settings'}->{'oai'}->{'metadataformat'};
59 if ( $metadata_format ) {
60 for my $schema ( keys %$metadata_format ) {
61 $logger->info('Loading schema ' . $schema) ;
62 $record_xslt{$schema}{namespace_uri} = $metadata_format->{$schema}->{namespace_uri};
63 $record_xslt{$schema}{schema_location} = $metadata_format->{$schema}->{schema_location};
64 $record_xslt{$schema}{xslt} = $_xslt->parse_stylesheet( $_parser->parse_file(
65 OpenSRF::Utils::SettingsClient->new->config_value( dirs => 'xsl' ) . '/' . $metadata_format->{$schema}->{xslt}
70 # Fall back on system defaults if oai_dc is not set in the configuration.
71 unless ( exists $record_xslt{oai_dc} ) {
72 $logger->info('Loading default oai_dc schema') ;
73 my $xslt = $_parser->parse_file(
74 OpenSRF::Utils::SettingsClient
76 ->config_value( dirs => 'xsl' ).
77 "/MARC21slim2OAIDC.xsl"
79 # and stash a transformer
80 $record_xslt{oai_dc}{xslt} = $_xslt->parse_stylesheet( $xslt );
81 $record_xslt{oai_dc}{namespace_uri} = 'http://www.openarchives.org/OAI/2.0/oai_dc/';
82 $record_xslt{oai_dc}{schema_location} = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd';
85 # Store info about the natural marcxml metadata setting. We don't actually use this to drive XSLT, but we can report support.
86 $logger->info('Loading default marcxml schema') ;
87 $record_xslt{marcxml}{namespace_uri} = 'http://www.loc.gov/MARC21/slim';
88 $record_xslt{marcxml}{docs} = 'http://www.loc.gov/MARC21/slim';
89 $record_xslt{marcxml}{schema_location} = 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd';
91 # Load the mapping of 852 holdings.
92 my $copies = OpenSRF::Utils::SettingsClient->new->config_value(apps => 'open-ils.supercat')->{'app_settings'}->{'oai'}->{'copies'} ;
94 foreach my $subfield_code (keys %$copies) {
95 my $value = $copies->{$subfield_code};
96 $logger->debug('Set 852 map ' . $subfield_code . '=' . $value );
97 $copies{$subfield_code} = $value;
99 } else { # if not defined, fall back on these defaults.
110 # Set the barcode filter and status filter
111 $barcode_filter = OpenSRF::Utils::SettingsClient->new->config_value(apps => 'open-ils.supercat')->{'app_settings'}->{'oai'}->{'barcode_filter'};
112 $status_filter = OpenSRF::Utils::SettingsClient->new->config_value(apps => 'open-ils.supercat')->{'app_settings'}->{'oai'}->{'status_filter'};
118 sub list_record_formats {
121 for my $type ( keys %record_xslt ) {
124 { namespace_uri => $record_xslt{$type}{namespace_uri},
125 docs => $record_xslt{$type}{docs},
126 schema_location => $record_xslt{$type}{schema_location},
134 __PACKAGE__->register_method(
135 method => 'list_record_formats',
136 api_name => 'open-ils.supercat.oai.record.formats',
141 desc => 'Returns the list of valid record formats that oai understands.',
144 desc => 'The format list.',
151 sub oai_biblio_retrieve {
156 my $metadataPrefix = shift;
161 (undef, $copy_org) = _set_spec_to_query('biblio',$set) if ($set);
165 $U->walk_org_tree($copy_org, sub {
167 $copy_org_filter{$c->id} = 1;
171 # holdings hold an array of call numbers, which hold an array of copies
172 # holdings => [ label: { library, [ copies: { barcode, location, status, circ_lib } ] } ]
175 my $_storage = OpenSRF::AppSession->create( 'open-ils.cstore' );
177 # Retrieve the bibliographic record and it's copies
178 my $tree = $_storage->request(
179 "open-ils.cstore.direct.biblio.record_entry.retrieve",
183 bre => [qw/call_numbers/],
184 acn => [qw/copies owning_lib prefix suffix uris/],
185 acp => [qw/location status circ_lib parts/],
190 $tree->call_numbers([]) if (!$tree->call_numbers);
192 # Create a MARC::Record object with the marc.
193 my $marc = MARC::Record->new_from_xml( $tree->marc, 'UTF8', 'XML');
195 # Retrieve the MFHD where we can find them.
197 if ( substr($marc->leader, 7, 1) eq 's' ) { # serial
198 my $_search = OpenSRF::AppSession->create( 'open-ils.search' );
199 my $_serials = $_search->request('open-ils.search.serial.record.bib.retrieve', $rec_id, 1, 0)->gather(1);
201 for my $sre (@$_serials) {
202 if ( $sre->location ) {
204 my @svr = split( ' -- ', $sre->location );
205 my $cn_label = $svr[-1];
206 $serials{$order}{'label'} = $cn_label ;
207 my $display = @{$sre->basic_holdings_add} ? $sre->basic_holdings_add : $sre->basic_holdings;
208 $serials{$order}{'ser'} = join(', ', @{$display});
213 my $edit_date = $tree->edit_date ;
215 # Prepare a hash of all holdings and serials
216 for my $cn (@{$tree->call_numbers}) {
218 next unless ( $cn->deleted eq 'f' || !$cn->deleted );
220 my @visible_copies = @{$cn->copies};
222 @visible_copies = grep { $copy_org_filter{$_->circ_lib->id} } @visible_copies;
224 @visible_copies = grep { _cp_is_visible($cn, $_) } @visible_copies;
225 next unless @visible_copies;
227 my $cn_label = $cn->label;
228 $holdings{$cn_label}{'owning_lib'} = $cn->owning_lib->shortname;
230 $edit_date = most_recent_date( $cn->edit_date, $edit_date );
232 for my $cp (@visible_copies) {
234 $edit_date = most_recent_date( $cp->edit_date, $edit_date );
236 # find the corresponding serial.
237 # There is no way of knowing here if the barcode 852$p is a correct match.
240 foreach my $key (sort keys %serials) {
241 my $serial = $serials{$key};
242 if ( $serial->{'label'} eq $cn_label ) {
243 $ser = $serial->{'ser'};
245 delete $serials{$key}; # in case we have several serial holdings with the same call number
249 $holdings{$cn_label}{'order'} = $order ;
251 my $circlib = $cp->circ_lib->shortname ;
252 push @{$holdings{$cn->label}{'copies'}}, {
253 owning_lib => $cn->owning_lib->shortname,
254 callnumber => $cn->label,
255 barcode => $cp->barcode,
256 status => $cp->status->name,
257 location => $cp->location->name,
258 circlib => $cp->circ_lib->shortname,
264 ## Append the holdings and MFHD data to the marc record and apply the stylesheet.
267 # Force record leader to 'a' as our data is always UTF8
268 # Avoids marc8_to_utf8 from being invoked with horrible results
269 # on the off-chance the record leader isn't correct
270 my $ldr = $marc->leader;
271 substr($ldr, 9, 1, 'a');
274 # Expects the record ID in the 001
275 $marc->delete_field($_) for ($marc->field('001'));
276 if (!$marc->field('001')) {
277 $marc->insert_fields_ordered(
278 MARC::Field->new( '001', $rec_id )
282 # Our reference node to prepend nodes to.
283 my $reference = $marc->field('901');
285 $marc->delete_field($_) for ($marc->field('852')); # remove any legacy 852s
286 foreach my $cn (sort { $holdings{$a}->{'order'} <=> $holdings{$b}->{'order'}} keys %holdings) {
287 foreach my $cp (@{$holdings{$cn}->{'copies'}}) {
288 my $marc_852 = MARC::Field->new(
289 '852', '4', ' ', 0 => 'dummy'); # The dummy is necessary to prevent a validation error.
290 foreach my $subfield_code (sort keys %copies) {
291 my $_cp = $copies{$subfield_code} ;
292 $marc_852->add_subfields($subfield_code, $cp->{$_cp} || $_cp) if ($_cp);
294 $marc_852->delete_subfield(code => '0');
295 $marc->insert_fields_before($reference, $marc_852);
296 if ( $cp->{'ser'} ) {
297 my $marc_866_a = MARC::Field->new( '866', '4', ' ', 'a' => $cp->{'ser'});
298 $marc->insert_fields_after( $marc_852, $marc_866_a ) ;
305 $XML::LibXML::skipXMLDeclaration = 1;
306 return $marc->as_xml_record() if ($metadataPrefix eq 'marcxml');
308 my $xslt = $record_xslt{$metadataPrefix}{xslt} ;
309 my $xml = $xslt->transform( $_parser->parse_string( $marc->as_xml_record()) );
310 return $xslt->output_as_chars( $xml ) ;
314 __PACKAGE__->register_method(
315 method => 'oai_biblio_retrieve',
316 api_name => 'open-ils.supercat.oai.biblio.retrieve',
321 desc => 'Returns the MARCXML representation of the requested bibliographic record.',
326 desc => 'An OpenILS biblio::record_entry id.',
330 name => 'metadataPrefix',
331 desc => 'The metadataPrefix of the schema.',
337 desc => 'An string of the XML in the desired schema.',
344 sub most_recent_date {
346 my $date1 = substr(shift, 0, 19) ; # e.g. '2001-02-03T04:05:06+0000' becomes '2001-02-03T04:05:06'
347 my $date2 = substr(shift, 0, 19) ;
348 my $_date1 = $date1 ;
349 my $_date2 = $date2 ;
351 $date1 =~ s/[-T:\.\+]//g ; # '2001-02-03T04:05:06' becomes '20010203040506'
352 $date2 =~ s/[-T:\.\+]//g ;
354 return $_date1 if ( $date1 > $date2) ;
365 if ( ($cp->deleted eq 'f' || !$cp->deleted) &&
366 ( ! $barcode_filter || $cp->barcode =~ /$barcode_filter/ ) &&
367 $cp->location->opac_visible eq 't' &&
368 $cp->status->opac_visible eq 't' &&
369 $cp->opac_visible eq 't' &&
370 $cp->circ_lib->opac_visible eq 't' &&
371 $cn->owning_lib->opac_visible eq 't' &&
372 (! $status_filter || $cp->status->name =~ /$status_filter/ )
381 sub oai_authority_retrieve {
386 my $metadataPrefix = shift;
388 my $_storage = OpenSRF::AppSession->create( 'open-ils.cstore' );
390 # Retrieve the authority record
391 my $record = $_storage->request('open-ils.cstore.direct.authority.record_entry.retrieve', $rec_id)->gather(1);
392 my $o = Fieldmapper::authority::record_entry->new($record) ;
393 my $marc = MARC::Record->new_from_xml( $o->marc, 'UTF8', 'XML');
395 # Expects the record ID in the 001
396 $marc->delete_field($_) for ($marc->field('001'));
397 if (!$marc->field('001')) {
398 $marc->insert_fields_ordered(
399 MARC::Field->new( '001', $rec_id )
403 local $XML::LibXML::skipXMLDeclaration = 1;
404 return $marc->as_xml_record() if ($metadataPrefix eq 'marcxml');
406 my $xslt = $record_xslt{$metadataPrefix}{xslt} ;
407 my $xml = $record_xslt{$metadataPrefix}{xslt}->transform(
408 $_parser->parse_string( $marc->as_xml_record())
410 return $record_xslt{$metadataPrefix}{xslt}->output_as_chars( $xml ) ;
414 __PACKAGE__->register_method(
415 method => 'oai_authority_retrieve',
416 api_name => 'open-ils.supercat.oai.authority.retrieve',
421 desc => 'Returns the MARCXML representation of the requested authority record.',
426 desc => 'An OpenILS authority::record_entry id.',
430 name => 'metadataPrefix',
431 desc => 'The metadataPrefix of the schema.',
437 desc => 'An string of the XML in the desired schema.',
444 sub oai_list_retrieve {
448 my $record_class = shift || 'biblio';
449 my $rec_id = shift || 0;
453 my $max_count = shift;
454 my $deleted_record = shift || 'yes';
456 my ($query) = _set_spec_to_query($record_class,$set) if ($set);
458 $query->{'rec_id'} = ($max_count eq 1) ? $rec_id : {'>=' => $rec_id} ;
459 $query->{'deleted'} = 'f' unless ( $deleted_record eq 'yes' );
460 $query->{'datestamp'} = {'>=', $from} if ( $from && !$until ) ;
461 $query->{'datestamp'} = {'<=', $until} if ( !$from && $until ) ;
462 $query->{'-and'} = [{'datestamp'=>{'>=' => $from}}, {'datestamp'=>{'<=' => $until}}] if ( $from && $until ) ;
464 my $_storage = OpenSRF::AppSession->create( 'open-ils.cstore' );
465 return $_storage->request('open-ils.cstore.direct.oai.' . $record_class . '.search.atomic',
468 limit => $max_count + 1
473 __PACKAGE__->register_method(
474 method => 'oai_list_retrieve',
475 api_name => 'open-ils.supercat.oai.list.retrieve',
480 desc => 'Returns a list of record identifiers.',
484 name => 'record_class',
485 desc => '\'biblio\' for bibliographic records or \'authority\' for authority records',
489 desc => 'An optional rec_id number used as a cursor.',
494 desc => 'The datestamp the resultset range should begin with.',
499 desc => 'The datestamp the resultset range should end with.',
504 desc => 'A setspec.',
509 desc => 'The number of identifiers to return.',
513 name => 'deleted_record',
514 desc => 'If set to \'no\' the response will only include active records.',
520 desc => 'An OAI type record.',
526 sub _set_spec_to_query {
528 my $set_spec = shift;
532 if ($type eq 'biblio') {
533 if ($set_spec =~ /COPIES:([^!]+)/) {
535 my $shortname = (split ':', $org_list)[-1];
536 my $org_unit = $U->find_org_by_shortname($U->get_org_tree, $shortname);
538 $copy_org = $org_unit;
539 $$query_part{'-or'} //= [];
540 push @{$$query_part{'-or'}}, {rec_id => {'=' => {
541 transform => 'oai.bib_is_visible_at_org_by_copy',
542 params => [$org_unit->id],
543 value => ['bool','1']
548 if ($set_spec =~ /LURIS:([^!]+)/) {
550 my $shortname = (split ':', $org_list)[-1];
551 my $org_unit = $U->find_org_by_shortname($U->get_org_tree, $shortname);
553 $copy_org = $org_unit;
554 $$query_part{'-or'} //= [];
555 push @{$$query_part{'-or'}}, {rec_id => {'=' => {
556 transform => 'oai.bib_is_visible_at_org_by_luri',
557 params => [$org_unit->id],
558 value => ['bool','1']
563 if ($set_spec =~ /SOURCES:([^!]+)/) {
565 my @sources = split ':', $list;
566 for my $source (@sources) {
567 $$query_part{'-or'} //= [];
568 push @{$$query_part{'-or'}}, {rec_id => {'=' => {
569 transform => 'oai.bib_is_visible_by_source',
571 value => ['bool','1']
575 } elsif ($type eq 'authority') {
576 my @axes = split ':', $set_spec;
577 for my $axis (@axes) {
578 $$query_part{'-or'} //= [];
579 push @{$$query_part{'-or'}}, {rec_id => {'=' => {
580 transform => 'oai.auth_is_visible_by_axis',
582 value => ['bool','1']
587 return ($query_part, $copy_org);