]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
adding subject searches to feed output
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / SuperCat.pm
1 package OpenILS::Application::SuperCat;
2
3 use strict;
4 use warnings;
5
6 # All OpenSRF applications must be based on OpenSRF::Application or
7 # a subclass thereof.  Makes sense, eh?
8 use OpenSRF::Application;
9 use base qw/OpenSRF::Application/;
10
11 # This is the client class, used for connecting to open-ils.storage
12 use OpenSRF::AppSession;
13
14 # This is an extention of Error.pm that supplies some error types to throw
15 use OpenSRF::EX qw(:try);
16
17 # This is a helper class for querying the OpenSRF Settings application ...
18 use OpenSRF::Utils::SettingsClient;
19
20 # ... and here we have the built in logging helper ...
21 use OpenSRF::Utils::Logger qw($logger);
22
23 # ... and this is our OpenILS object (en|de)coder and psuedo-ORM package.
24 use OpenILS::Utils::Fieldmapper;
25
26
27 # We'll be working with XML, so...
28 use XML::LibXML;
29 use XML::LibXSLT;
30 use Unicode::Normalize;
31
32 use JSON;
33
34 our (
35   $_parser,
36   $_xslt,
37   $_storage,
38   %record_xslt,
39   %metarecord_xslt,
40 );
41
42 sub child_init {
43         # we need an XML parser
44         $_parser = new XML::LibXML;
45
46         $logger->debug("Got here!");
47
48         # and an xslt parser
49         $_xslt = new XML::LibXSLT;
50         
51         # parse the MODS xslt ...
52         my $mods3_xslt = $_parser->parse_file(
53                 OpenSRF::Utils::SettingsClient
54                         ->new
55                         ->config_value( dirs => 'xsl' ).
56                 "/MARC21slim2MODS3.xsl"
57         );
58         # and stash a transformer
59         $record_xslt{mods3}{xslt} = $_xslt->parse_stylesheet( $mods3_xslt );
60         $record_xslt{mods3}{namespace_uri} = 'http://www.loc.gov/mods/v3';
61         $record_xslt{mods3}{docs} = 'http://www.loc.gov/mods/';
62         $record_xslt{mods3}{schema_location} = 'http://www.loc.gov/standards/mods/v3/mods-3-1.xsd';
63
64         # parse the MODS xslt ...
65         my $mods_xslt = $_parser->parse_file(
66                 OpenSRF::Utils::SettingsClient
67                         ->new
68                         ->config_value( dirs => 'xsl' ).
69                 "/MARC21slim2MODS.xsl"
70         );
71         # and stash a transformer
72         $record_xslt{mods}{xslt} = $_xslt->parse_stylesheet( $mods_xslt );
73         $record_xslt{mods}{namespace_uri} = 'http://www.loc.gov/mods/';
74         $record_xslt{mods}{docs} = 'http://www.loc.gov/mods/';
75         $record_xslt{mods}{schema_location} = 'http://www.loc.gov/standards/mods/mods.xsd';
76
77         $logger->debug("Got here!");
78
79         # parse the ATOM entry xslt ...
80         my $atom_xslt = $_parser->parse_file(
81                 OpenSRF::Utils::SettingsClient
82                         ->new
83                         ->config_value( dirs => 'xsl' ).
84                 "/MARC21slim2ATOM.xsl"
85         );
86         # and stash a transformer
87         $record_xslt{atom}{xslt} = $_xslt->parse_stylesheet( $atom_xslt );
88         $record_xslt{atom}{namespace_uri} = 'http://www.w3.org/2005/Atom';
89         $record_xslt{atom}{docs} = 'http://www.ietf.org/rfc/rfc4287.txt';
90
91         # parse the RDFDC xslt ...
92         my $rdf_dc_xslt = $_parser->parse_file(
93                 OpenSRF::Utils::SettingsClient
94                         ->new
95                         ->config_value( dirs => 'xsl' ).
96                 "/MARC21slim2RDFDC.xsl"
97         );
98         # and stash a transformer
99         $record_xslt{rdf_dc}{xslt} = $_xslt->parse_stylesheet( $rdf_dc_xslt );
100         $record_xslt{rdf_dc}{namespace_uri} = 'http://purl.org/dc/elements/1.1/';
101         $record_xslt{rdf_dc}{schema_location} = 'http://purl.org/dc/elements/1.1/';
102
103         $logger->debug("Got here!");
104
105         # parse the SRWDC xslt ...
106         my $srw_dc_xslt = $_parser->parse_file(
107                 OpenSRF::Utils::SettingsClient
108                         ->new
109                         ->config_value( dirs => 'xsl' ).
110                 "/MARC21slim2SRWDC.xsl"
111         );
112         # and stash a transformer
113         $record_xslt{srw_dc}{xslt} = $_xslt->parse_stylesheet( $srw_dc_xslt );
114         $record_xslt{srw_dc}{namespace_uri} = 'info:srw/schema/1/dc-schema';
115         $record_xslt{srw_dc}{schema_location} = 'http://www.loc.gov/z3950/agency/zing/srw/dc-schema.xsd';
116
117         $logger->debug("Got here!");
118
119         # parse the OAIDC xslt ...
120         my $oai_dc_xslt = $_parser->parse_file(
121                 OpenSRF::Utils::SettingsClient
122                         ->new
123                         ->config_value( dirs => 'xsl' ).
124                 "/MARC21slim2OAIDC.xsl"
125         );
126         # and stash a transformer
127         $record_xslt{oai_dc}{xslt} = $_xslt->parse_stylesheet( $oai_dc_xslt );
128         $record_xslt{oai_dc}{namespace_uri} = 'http://www.openarchives.org/OAI/2.0/oai_dc/';
129         $record_xslt{oai_dc}{schema_location} = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd';
130
131         $logger->debug("Got here!");
132
133         # parse the RSS xslt ...
134         my $rss_xslt = $_parser->parse_file(
135                 OpenSRF::Utils::SettingsClient
136                         ->new
137                         ->config_value( dirs => 'xsl' ).
138                 "/MARC21slim2RSS2.xsl"
139         );
140         # and stash a transformer
141         $record_xslt{rss2}{xslt} = $_xslt->parse_stylesheet( $rss_xslt );
142
143         $logger->debug("Got here!");
144
145         # and finally, a storage server session
146         $_storage = OpenSRF::AppSession->create( 'open-ils.storage' );
147
148         register_record_transforms();
149
150         return 1;
151 }
152
153 sub register_record_transforms {
154         for my $type ( keys %record_xslt ) {
155                 __PACKAGE__->register_method(
156                         method    => 'retrieve_record_transform',
157                         api_name  => "open-ils.supercat.record.$type.retrieve",
158                         api_level => 1,
159                         argc      => 1,
160                         signature =>
161                                 { desc     => <<"                                 DESC",
162 Returns the \U$type\E representation of the requested bibliographic record
163                                   DESC
164                                   params   =>
165                                         [
166                                                 { name => 'bibId',
167                                                   desc => 'An OpenILS biblio::record_entry id',
168                                                   type => 'number' },
169                                         ],
170                                 'return' =>
171                                         { desc => "The bib record in \U$type\E",
172                                           type => 'string' }
173                                 }
174                 );
175         }
176 }
177
178
179 sub entityize {
180         my $stuff = NFC(shift());
181         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
182         return $stuff;
183 }
184
185
186 sub retrieve_record_marcxml {
187         my $self = shift;
188         my $client = shift;
189         my $rid = shift;
190
191         return
192         entityize(
193                 $_storage
194                         ->request( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rid )
195                         ->gather(1)
196                         ->marc
197         );
198 }
199
200 __PACKAGE__->register_method(
201         method    => 'retrieve_record_marcxml',
202         api_name  => 'open-ils.supercat.record.marcxml.retrieve',
203         api_level => 1,
204         argc      => 1,
205         signature =>
206                 { desc     => <<"                 DESC",
207 Returns the MARCXML representation of the requested bibliographic record
208                   DESC
209                   params   =>
210                         [
211                                 { name => 'bibId',
212                                   desc => 'An OpenILS biblio::record_entry id',
213                                   type => 'number' },
214                         ],
215                   'return' =>
216                         { desc => 'The bib record in MARCXML',
217                           type => 'string' }
218                 }
219 );
220
221 sub retrieve_record_transform {
222         my $self = shift;
223         my $client = shift;
224         my $rid = shift;
225
226         (my $transform = $self->api_name) =~ s/^.+record\.([^\.]+)\.retrieve$/$1/o;
227
228         my $marc = $_storage->request(
229                 'open-ils.storage.direct.biblio.record_entry.retrieve',
230                 $rid
231         )->gather(1)->marc;
232
233         return entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $marc ) )->toString);
234 }
235
236
237 sub retrieve_metarecord_mods {
238         my $self = shift;
239         my $client = shift;
240         my $rid = shift;
241
242         # We want a session
243         $_storage->connect;
244
245         # Get the metarecord in question
246         my $mr =
247         $_storage->request(
248                 'open-ils.storage.direct.metabib.metarecord.retrieve' => $rid
249         )->gather(1);
250
251         # Now get the map of all bib records for the metarecord
252         my $recs =
253         $_storage->request(
254                 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
255                 $rid
256         )->gather(1);
257
258         $logger->debug("Adding ".scalar(@$recs)." bib record to the MODS of the metarecord");
259
260         # and retrieve the lead (master) record as MODS
261         my ($master) =
262                 $self   ->method_lookup('open-ils.supercat.record.mods.retrieve')
263                         ->run($mr->master_record);
264         my $master_mods = $_parser->parse_string($master)->documentElement;
265         $master_mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
266
267         # ... and a MODS clone to populate, with guts removed.
268         my $mods = $_parser->parse_string($master)->documentElement;
269         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
270         ($mods) = $mods->findnodes('//mods:mods');
271         $mods->removeChildNodes;
272
273         # Add the metarecord ID as a (locally defined) info URI
274         my $recordInfo = $mods
275                 ->ownerDocument
276                 ->createElement("mods:recordInfo");
277
278         my $recordIdentifier = $mods
279                 ->ownerDocument
280                 ->createElement("mods:recordIdentifier");
281
282         my ($year,$month,$day) = reverse( (localtime)[3,4,5] );
283         $year += 1900;
284         $month += 1;
285
286         my $id = $mr->id;
287         $recordIdentifier->appendTextNode(
288                 sprintf("tag:open-ils.org,$year-\%0.2d-\%0.2d:metabib-metarecord/$id", $month, $day)
289         );
290
291         $recordInfo->appendChild($recordIdentifier);
292         $mods->appendChild($recordInfo);
293
294         # Grab the title, author and ISBN for the master record and populate the metarecord
295         my ($title) = $master_mods->findnodes( './mods:titleInfo[not(@type)]' );
296         
297         if ($title) {
298                 $title->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
299                 $title = $mods->ownerDocument->importNode($title);
300                 $mods->appendChild($title);
301         }
302
303         my ($author) = $master_mods->findnodes( './mods:name[mods:role/mods:text[text()="creator"]]' );
304         if ($author) {
305                 $author->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
306                 $author = $mods->ownerDocument->importNode($author);
307                 $mods->appendChild($author);
308         }
309
310         my ($isbn) = $master_mods->findnodes( './mods:identifier[@type="isbn"]' );
311         if ($isbn) {
312                 $isbn->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
313                 $isbn = $mods->ownerDocument->importNode($isbn);
314                 $mods->appendChild($isbn);
315         }
316
317         # ... and loop over the constituent records
318         for my $map ( @$recs ) {
319
320                 # get the MODS
321                 my ($rec) =
322                         $self   ->method_lookup('open-ils.supercat.record.mods.retrieve')
323                                 ->run($map->source);
324
325                 my $part_mods = $_parser->parse_string($rec);
326                 $part_mods->documentElement->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
327                 ($part_mods) = $part_mods->findnodes('//mods:mods');
328
329                 for my $node ( ($part_mods->findnodes( './mods:subject' )) ) {
330                         $node->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
331                         $node = $mods->ownerDocument->importNode($node);
332                         $mods->appendChild( $node );
333                 }
334
335                 my $relatedItem = $mods
336                         ->ownerDocument
337                         ->createElement("mods:relatedItem");
338
339                 $relatedItem->setAttribute( type => 'constituent' );
340
341                 my $identifier = $mods
342                         ->ownerDocument
343                         ->createElement("mods:identifier");
344
345                 $identifier->setAttribute( type => 'uri' );
346
347                 my $subRecordInfo = $mods
348                         ->ownerDocument
349                         ->createElement("mods:recordInfo");
350
351                 my $subRecordIdentifier = $mods
352                         ->ownerDocument
353                         ->createElement("mods:recordIdentifier");
354
355                 my $subid = $map->source;
356                 $subRecordIdentifier->appendTextNode(
357                         sprintf("tag:open-ils.org,$year-\%0.2d-\%0.2d:biblio-record_entry/$subid",
358                                 $month,
359                                 $day
360                         )
361                 );
362                 $subRecordInfo->appendChild($subRecordIdentifier);
363
364                 $relatedItem->appendChild( $subRecordInfo );
365
366                 my ($tor) = $part_mods->findnodes( './mods:typeOfResource' );
367                 $tor->setNamespace( "http://www.loc.gov/mods/", "mods", 1 ) if ($tor);
368                 $tor = $mods->ownerDocument->importNode($tor) if ($tor);
369                 $relatedItem->appendChild($tor) if ($tor);
370
371                 if ( my ($part_isbn) = $part_mods->findnodes( './mods:identifier[@type="isbn"]' ) ) {
372                         $part_isbn->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
373                         $part_isbn = $mods->ownerDocument->importNode($part_isbn);
374                         $relatedItem->appendChild( $part_isbn );
375
376                         if (!$isbn) {
377                                 $isbn = $mods->appendChild( $part_isbn->cloneNode(1) );
378                         }
379                 }
380
381                 $mods->appendChild( $relatedItem );
382
383         }
384
385         $_storage->disconnect;
386
387         return entityize($mods->toString);
388
389 }
390 __PACKAGE__->register_method(
391         method    => 'retrieve_metarecord_mods',
392         api_name  => 'open-ils.supercat.metarecord.mods.retrieve',
393         api_level => 1,
394         argc      => 1,
395         signature =>
396                 { desc     => <<"                 DESC",
397 Returns the MODS representation of the requested metarecord
398                   DESC
399                   params   =>
400                         [
401                                 { name => 'metarecordId',
402                                   desc => 'An OpenILS metabib::metarecord id',
403                                   type => 'number' },
404                         ],
405                   'return' =>
406                         { desc => 'The metarecord in MODS',
407                           type => 'string' }
408                 }
409 );
410
411 sub list_metarecord_formats {
412         my @list = (
413                 { mods =>
414                         { namespace_uri   => 'http://www.loc.gov/mods/',
415                           docs            => 'http://www.loc.gov/mods/',
416                           schema_location => 'http://www.loc.gov/standards/mods/mods.xsd',
417                         }
418                 }
419         );
420
421         for my $type ( keys %metarecord_xslt ) {
422                 push @list,
423                         { $type => 
424                                 { namespace_uri   => $metarecord_xslt{$type}{namespace_uri},
425                                   docs            => $metarecord_xslt{$type}{docs},
426                                   schema_location => $metarecord_xslt{$type}{schema_location},
427                                 }
428                         };
429         }
430
431         return \@list;
432 }
433 __PACKAGE__->register_method(
434         method    => 'list_metarecord_formats',
435         api_name  => 'open-ils.supercat.metarecord.formats',
436         api_level => 1,
437         argc      => 0,
438         signature =>
439                 { desc     => <<"                 DESC",
440 Returns the list of valid metarecord formats that supercat understands.
441                   DESC
442                   'return' =>
443                         { desc => 'The format list',
444                           type => 'array' }
445                 }
446 );
447
448
449 sub list_record_formats {
450         my @list = (
451                 { marcxml =>
452                         { namespace_uri   => 'http://www.loc.gov/MARC21/slim',
453                           docs            => 'http://www.loc.gov/marcxml/',
454                           schema_location => 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd',
455                         }
456                 }
457         );
458
459         for my $type ( keys %record_xslt ) {
460                 push @list,
461                         { $type => 
462                                 { namespace_uri   => $record_xslt{$type}{namespace_uri},
463                                   docs            => $record_xslt{$type}{docs},
464                                   schema_location => $record_xslt{$type}{schema_location},
465                                 }
466                         };
467         }
468
469         return \@list;
470 }
471 __PACKAGE__->register_method(
472         method    => 'list_record_formats',
473         api_name  => 'open-ils.supercat.record.formats',
474         api_level => 1,
475         argc      => 0,
476         signature =>
477                 { desc     => <<"                 DESC",
478 Returns the list of valid record formats that supercat understands.
479                   DESC
480                   'return' =>
481                         { desc => 'The format list',
482                           type => 'array' }
483                 }
484 );
485
486
487 sub oISBN {
488         my $self = shift;
489         my $client = shift;
490         my $isbn = shift;
491
492         throw OpenSRF::EX::InvalidArg ('I need an ISBN please')
493                 unless (length($isbn) >= 10);
494
495         # Create a storage session, since we'll be making muliple requests.
496         $_storage->connect;
497
498         # Find the record that has that ISBN.
499         my $bibrec = $_storage->request(
500                 'open-ils.storage.direct.metabib.full_rec.search_where.atomic',
501                 { tag => '020', subfield => 'a', value => { ilike => $isbn.'%'} }
502         )->gather(1);
503
504         # Go away if we don't have one.
505         return {} unless (@$bibrec);
506
507         # Find the metarecord for that bib record.
508         my $mr = $_storage->request(
509                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
510                 $bibrec->[0]->record
511         )->gather(1);
512
513         # Find the other records for that metarecord.
514         my $records = $_storage->request(
515                 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
516                 $mr->[0]->metarecord
517         )->gather(1);
518
519         # Just to be safe.  There's currently no unique constraint on sources...
520         my %unique_recs = map { ($_->source, 1) } @$records;
521         my @rec_list = sort keys %unique_recs;
522
523         # And now fetch the ISBNs for thos records.
524         my $recs = $_storage->request(
525                 'open-ils.storage.direct.metabib.full_rec.search_where.atomic',
526                 { tag => '020', subfield => 'a', record => \@rec_list }
527         )->gather(1);
528
529         # We're done with the storage server session.
530         $_storage->disconnect;
531
532         # Return the oISBN data structure.  This will be XMLized at a higher layer.
533         return
534                 { metarecord => $mr->[0]->metarecord,
535                   record_list => { map { ($_->record, $_->value) } @$recs } };
536
537 }
538 __PACKAGE__->register_method(
539         method    => 'oISBN',
540         api_name  => 'open-ils.supercat.oisbn',
541         api_level => 1,
542         argc      => 1,
543         signature =>
544                 { desc     => <<"                 DESC",
545 Returns the ISBN list for the metarecord of the requested isbn
546                   DESC
547                   params   =>
548                         [
549                                 { name => 'isbn',
550                                   desc => 'An ISBN.  Duh.',
551                                   type => 'string' },
552                         ],
553                   'return' =>
554                         { desc => 'record to isbn map',
555                           type => 'object' }
556                 }
557 );
558
559 1;