]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
adding atom format support and bookbag feeds (atom/rss2/mods/html(soon))
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / SuperCat.pm
1 package OpenILS::Application::SuperCat;
2
3 use strict;
4 use warnings;
5
6 # All OpenSRF applications must be based on OpenSRF::Application or
7 # a subclass thereof.  Makes sense, eh?
8 use OpenSRF::Application;
9 use base qw/OpenSRF::Application/;
10
11 # This is the client class, used for connecting to open-ils.storage
12 use OpenSRF::AppSession;
13
14 # This is an extention of Error.pm that supplies some error types to throw
15 use OpenSRF::EX qw(:try);
16
17 # This is a helper class for querying the OpenSRF Settings application ...
18 use OpenSRF::Utils::SettingsClient;
19
20 # ... and here we have the built in logging helper ...
21 use OpenSRF::Utils::Logger qw($logger);
22
23 # ... and this is our OpenILS object (en|de)coder and psuedo-ORM package.
24 use OpenILS::Utils::Fieldmapper;
25
26
27 # We'll be working with XML, so...
28 use XML::LibXML;
29 use XML::LibXSLT;
30 use Unicode::Normalize;
31
32 use JSON;
33
34 our (
35   $_parser,
36   $_xslt,
37   $_storage,
38   %record_xslt,
39   %metarecord_xslt,
40 );
41
42 sub child_init {
43         # we need an XML parser
44         $_parser = new XML::LibXML;
45
46         $logger->debug("Got here!");
47
48         # and an xslt parser
49         $_xslt = new XML::LibXSLT;
50         
51         # parse the MODS xslt ...
52         my $mods_xslt = $_parser->parse_file(
53                 OpenSRF::Utils::SettingsClient
54                         ->new
55                         ->config_value( dirs => 'xsl' ).
56                 "/MARC21slim2MODS.xsl"
57         );
58         # and stash a transformer
59         $record_xslt{mods}{xslt} = $_xslt->parse_stylesheet( $mods_xslt );
60         $record_xslt{mods}{namespace_uri} = 'http://www.loc.gov/mods/';
61         $record_xslt{mods}{docs} = 'http://www.loc.gov/mods/';
62         $record_xslt{mods}{schema_location} = 'http://www.loc.gov/standards/mods/mods.xsd';
63
64         $logger->debug("Got here!");
65
66         # parse the ATOM entry xslt ...
67         my $atom_xslt = $_parser->parse_file(
68                 OpenSRF::Utils::SettingsClient
69                         ->new
70                         ->config_value( dirs => 'xsl' ).
71                 "/MARC21slim2ATOM.xsl"
72         );
73         # and stash a transformer
74         $record_xslt{atom}{xslt} = $_xslt->parse_stylesheet( $atom_xslt );
75         $record_xslt{atom}{namespace_uri} = 'http://www.w3.org/2005/Atom';
76         $record_xslt{atom}{docs} = 'http://www.ietf.org/rfc/rfc4287.txt';
77
78         # parse the RDFDC xslt ...
79         my $rdf_dc_xslt = $_parser->parse_file(
80                 OpenSRF::Utils::SettingsClient
81                         ->new
82                         ->config_value( dirs => 'xsl' ).
83                 "/MARC21slim2RDFDC.xsl"
84         );
85         # and stash a transformer
86         $record_xslt{rdf_dc}{xslt} = $_xslt->parse_stylesheet( $rdf_dc_xslt );
87         $record_xslt{rdf_dc}{namespace_uri} = 'http://purl.org/dc/elements/1.1/';
88         $record_xslt{rdf_dc}{schema_location} = 'http://purl.org/dc/elements/1.1/';
89
90         $logger->debug("Got here!");
91
92         # parse the SRWDC xslt ...
93         my $srw_dc_xslt = $_parser->parse_file(
94                 OpenSRF::Utils::SettingsClient
95                         ->new
96                         ->config_value( dirs => 'xsl' ).
97                 "/MARC21slim2SRWDC.xsl"
98         );
99         # and stash a transformer
100         $record_xslt{srw_dc}{xslt} = $_xslt->parse_stylesheet( $srw_dc_xslt );
101         $record_xslt{srw_dc}{namespace_uri} = 'info:srw/schema/1/dc-schema';
102         $record_xslt{srw_dc}{schema_location} = 'http://www.loc.gov/z3950/agency/zing/srw/dc-schema.xsd';
103
104         $logger->debug("Got here!");
105
106         # parse the OAIDC xslt ...
107         my $oai_dc_xslt = $_parser->parse_file(
108                 OpenSRF::Utils::SettingsClient
109                         ->new
110                         ->config_value( dirs => 'xsl' ).
111                 "/MARC21slim2OAIDC.xsl"
112         );
113         # and stash a transformer
114         $record_xslt{oai_dc}{xslt} = $_xslt->parse_stylesheet( $oai_dc_xslt );
115         $record_xslt{oai_dc}{namespace_uri} = 'http://www.openarchives.org/OAI/2.0/oai_dc/';
116         $record_xslt{oai_dc}{schema_location} = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd';
117
118         $logger->debug("Got here!");
119
120         # parse the RSS xslt ...
121         my $rss_xslt = $_parser->parse_file(
122                 OpenSRF::Utils::SettingsClient
123                         ->new
124                         ->config_value( dirs => 'xsl' ).
125                 "/MARC21slim2RSS2.xsl"
126         );
127         # and stash a transformer
128         $record_xslt{rss2}{xslt} = $_xslt->parse_stylesheet( $rss_xslt );
129
130         $logger->debug("Got here!");
131
132         # and finally, a storage server session
133         $_storage = OpenSRF::AppSession->create( 'open-ils.storage' );
134
135         register_record_transforms();
136
137         return 1;
138 }
139
140 sub register_record_transforms {
141         for my $type ( keys %record_xslt ) {
142                 __PACKAGE__->register_method(
143                         method    => 'retrieve_record_transform',
144                         api_name  => "open-ils.supercat.record.$type.retrieve",
145                         api_level => 1,
146                         argc      => 1,
147                         signature =>
148                                 { desc     => <<"                                 DESC",
149 Returns the \U$type\E representation of the requested bibliographic record
150                                   DESC
151                                   params   =>
152                                         [
153                                                 { name => 'bibId',
154                                                   desc => 'An OpenILS biblio::record_entry id',
155                                                   type => 'number' },
156                                         ],
157                                 'return' =>
158                                         { desc => "The bib record in \U$type\E",
159                                           type => 'string' }
160                                 }
161                 );
162         }
163 }
164
165
166 sub entityize {
167         my $stuff = NFC(shift());
168         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
169         return $stuff;
170 }
171
172
173 sub retrieve_record_marcxml {
174         my $self = shift;
175         my $client = shift;
176         my $rid = shift;
177
178         return
179         entityize(
180                 $_storage
181                         ->request( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rid )
182                         ->gather(1)
183                         ->marc
184         );
185 }
186
187 __PACKAGE__->register_method(
188         method    => 'retrieve_record_marcxml',
189         api_name  => 'open-ils.supercat.record.marcxml.retrieve',
190         api_level => 1,
191         argc      => 1,
192         signature =>
193                 { desc     => <<"                 DESC",
194 Returns the MARCXML representation of the requested bibliographic record
195                   DESC
196                   params   =>
197                         [
198                                 { name => 'bibId',
199                                   desc => 'An OpenILS biblio::record_entry id',
200                                   type => 'number' },
201                         ],
202                   'return' =>
203                         { desc => 'The bib record in MARCXML',
204                           type => 'string' }
205                 }
206 );
207
208 sub retrieve_record_transform {
209         my $self = shift;
210         my $client = shift;
211         my $rid = shift;
212
213         (my $transform = $self->api_name) =~ s/^.+record\.([^\.]+)\.retrieve$/$1/o;
214
215         my $marc = $_storage->request(
216                 'open-ils.storage.direct.biblio.record_entry.retrieve',
217                 $rid
218         )->gather(1)->marc;
219
220         return entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $marc ) )->toString);
221 }
222
223
224 sub retrieve_metarecord_mods {
225         my $self = shift;
226         my $client = shift;
227         my $rid = shift;
228
229         # We want a session
230         $_storage->connect;
231
232         # Get the metarecord in question
233         my $mr =
234         $_storage->request(
235                 'open-ils.storage.direct.metabib.metarecord.retrieve' => $rid
236         )->gather(1);
237
238         # Now get the map of all bib records for the metarecord
239         my $recs =
240         $_storage->request(
241                 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
242                 $rid
243         )->gather(1);
244
245         $logger->debug("Adding ".scalar(@$recs)." bib record to the MODS of the metarecord");
246
247         # and retrieve the lead (master) record as MODS
248         my ($master) =
249                 $self   ->method_lookup('open-ils.supercat.record.mods.retrieve')
250                         ->run($mr->master_record);
251         my $master_mods = $_parser->parse_string($master)->documentElement;
252         $master_mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
253
254         # ... and a MODS clone to populate, with guts removed.
255         my $mods = $_parser->parse_string($master)->documentElement;
256         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
257         ($mods) = $mods->findnodes('//mods:mods');
258         $mods->removeChildNodes;
259
260         # Add the metarecord ID as a (locally defined) info URI
261         my $recordInfo = $mods
262                 ->ownerDocument
263                 ->createElement("mods:recordInfo");
264
265         my $recordIdentifier = $mods
266                 ->ownerDocument
267                 ->createElement("mods:recordIdentifier");
268
269         my ($year,$month,$day) = reverse( (localtime)[3,4,5] );
270         $year += 1900;
271         $month += 1;
272
273         my $id = $mr->id;
274         $recordIdentifier->appendTextNode(
275                 sprintf("tag:open-ils.org,$year-\%0.2d-\%0.2d:biblio-record_entry/$id",
276                         $month,
277                         $day
278                 )
279         );
280
281         $recordInfo->appendChild($recordIdentifier);
282         $mods->appendChild($recordInfo);
283
284         # Grab the title, author and ISBN for the master record and populate the metarecord
285         my ($title) = $master_mods->findnodes( './mods:titleInfo[not(@type)]' );
286         
287         if ($title) {
288                 $title->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
289                 $title = $mods->ownerDocument->importNode($title);
290                 $mods->appendChild($title);
291         }
292
293         my ($author) = $master_mods->findnodes( './mods:name[mods:role/mods:text[text()="creator"]]' );
294         if ($author) {
295                 $author->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
296                 $author = $mods->ownerDocument->importNode($author);
297                 $mods->appendChild($author);
298         }
299
300         my ($isbn) = $master_mods->findnodes( './mods:identifier[@type="isbn"]' );
301         if ($isbn) {
302                 $isbn->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
303                 $isbn = $mods->ownerDocument->importNode($isbn);
304                 $mods->appendChild($isbn);
305         }
306
307         # ... and loop over the constituent records
308         for my $map ( @$recs ) {
309
310                 # get the MODS
311                 my ($rec) =
312                         $self   ->method_lookup('open-ils.supercat.record.mods.retrieve')
313                                 ->run($map->source);
314
315                 my $part_mods = $_parser->parse_string($rec);
316                 $part_mods->documentElement->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
317                 ($part_mods) = $part_mods->findnodes('//mods:mods');
318
319                 for my $node ( ($part_mods->findnodes( './mods:subject' )) ) {
320                         $node->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
321                         $node = $mods->ownerDocument->importNode($node);
322                         $mods->appendChild( $node );
323                 }
324
325                 my $relatedItem = $mods
326                         ->ownerDocument
327                         ->createElement("mods:relatedItem");
328
329                 $relatedItem->setAttribute( type => 'constituent' );
330
331                 my $identifier = $mods
332                         ->ownerDocument
333                         ->createElement("mods:identifier");
334
335                 $identifier->setAttribute( type => 'uri' );
336
337                 my $subRecordInfo = $mods
338                         ->ownerDocument
339                         ->createElement("mods:recordInfo");
340
341                 my $subRecordIdentifier = $mods
342                         ->ownerDocument
343                         ->createElement("mods:recordIdentifier");
344
345                 my $subid = $map->source;
346                 $subRecordIdentifier->appendTextNode(
347                         sprintf("tag:open-ils.org,$year-\%0.2d-\%0.2d:biblio-record_entry/$subid",
348                                 $month,
349                                 $day
350                         )
351                 );
352                 $subRecordInfo->appendChild($subRecordIdentifier);
353
354                 $relatedItem->appendChild( $subRecordInfo );
355
356                 my ($tor) = $part_mods->findnodes( './mods:typeOfResource' );
357                 $tor->setNamespace( "http://www.loc.gov/mods/", "mods", 1 ) if ($tor);
358                 $tor = $mods->ownerDocument->importNode($tor) if ($tor);
359                 $relatedItem->appendChild($tor) if ($tor);
360
361                 if ( my ($part_isbn) = $part_mods->findnodes( './mods:identifier[@type="isbn"]' ) ) {
362                         $part_isbn->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
363                         $part_isbn = $mods->ownerDocument->importNode($part_isbn);
364                         $relatedItem->appendChild( $part_isbn );
365
366                         if (!$isbn) {
367                                 $isbn = $mods->appendChild( $part_isbn->cloneNode(1) );
368                         }
369                 }
370
371                 $mods->appendChild( $relatedItem );
372
373         }
374
375         $_storage->disconnect;
376
377         return entityize($mods->toString);
378
379 }
380 __PACKAGE__->register_method(
381         method    => 'retrieve_metarecord_mods',
382         api_name  => 'open-ils.supercat.metarecord.mods.retrieve',
383         api_level => 1,
384         argc      => 1,
385         signature =>
386                 { desc     => <<"                 DESC",
387 Returns the MODS representation of the requested metarecord
388                   DESC
389                   params   =>
390                         [
391                                 { name => 'metarecordId',
392                                   desc => 'An OpenILS metabib::metarecord id',
393                                   type => 'number' },
394                         ],
395                   'return' =>
396                         { desc => 'The metarecord in MODS',
397                           type => 'string' }
398                 }
399 );
400
401 sub list_metarecord_formats {
402         my @list = (
403                 { mods =>
404                         { namespace_uri   => 'http://www.loc.gov/mods/',
405                           docs            => 'http://www.loc.gov/mods/',
406                           schema_location => 'http://www.loc.gov/standards/mods/mods.xsd',
407                         }
408                 }
409         );
410
411         for my $type ( keys %metarecord_xslt ) {
412                 push @list,
413                         { $type => 
414                                 { namespace_uri   => $metarecord_xslt{$type}{namespace_uri},
415                                   docs            => $metarecord_xslt{$type}{docs},
416                                   schema_location => $metarecord_xslt{$type}{schema_location},
417                                 }
418                         };
419         }
420
421         return \@list;
422 }
423 __PACKAGE__->register_method(
424         method    => 'list_metarecord_formats',
425         api_name  => 'open-ils.supercat.metarecord.formats',
426         api_level => 1,
427         argc      => 0,
428         signature =>
429                 { desc     => <<"                 DESC",
430 Returns the list of valid metarecord formats that supercat understands.
431                   DESC
432                   'return' =>
433                         { desc => 'The format list',
434                           type => 'array' }
435                 }
436 );
437
438
439 sub list_record_formats {
440         my @list = (
441                 { marcxml =>
442                         { namespace_uri   => 'http://www.loc.gov/MARC21/slim',
443                           docs            => 'http://www.loc.gov/marcxml/',
444                           schema_location => 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd',
445                         }
446                 }
447         );
448
449         for my $type ( keys %record_xslt ) {
450                 push @list,
451                         { $type => 
452                                 { namespace_uri   => $record_xslt{$type}{namespace_uri},
453                                   docs            => $record_xslt{$type}{docs},
454                                   schema_location => $record_xslt{$type}{schema_location},
455                                 }
456                         };
457         }
458
459         return \@list;
460 }
461 __PACKAGE__->register_method(
462         method    => 'list_record_formats',
463         api_name  => 'open-ils.supercat.record.formats',
464         api_level => 1,
465         argc      => 0,
466         signature =>
467                 { desc     => <<"                 DESC",
468 Returns the list of valid record formats that supercat understands.
469                   DESC
470                   'return' =>
471                         { desc => 'The format list',
472                           type => 'array' }
473                 }
474 );
475
476
477 sub oISBN {
478         my $self = shift;
479         my $client = shift;
480         my $isbn = shift;
481
482         throw OpenSRF::EX::InvalidArg ('I need an ISBN please')
483                 unless (length($isbn) >= 10);
484
485         # Create a storage session, since we'll be making muliple requests.
486         $_storage->connect;
487
488         # Find the record that has that ISBN.
489         my $bibrec = $_storage->request(
490                 'open-ils.storage.direct.metabib.full_rec.search_where.atomic',
491                 { tag => '020', subfield => 'a', value => { ilike => $isbn.'%'} }
492         )->gather(1);
493
494         # Go away if we don't have one.
495         return {} unless (@$bibrec);
496
497         # Find the metarecord for that bib record.
498         my $mr = $_storage->request(
499                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
500                 $bibrec->[0]->record
501         )->gather(1);
502
503         # Find the other records for that metarecord.
504         my $records = $_storage->request(
505                 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
506                 $mr->[0]->metarecord
507         )->gather(1);
508
509         # Just to be safe.  There's currently no unique constraint on sources...
510         my %unique_recs = map { ($_->source, 1) } @$records;
511         my @rec_list = sort keys %unique_recs;
512
513         # And now fetch the ISBNs for thos records.
514         my $recs = $_storage->request(
515                 'open-ils.storage.direct.metabib.full_rec.search_where.atomic',
516                 { tag => '020', subfield => 'a', record => \@rec_list }
517         )->gather(1);
518
519         # We're done with the storage server session.
520         $_storage->disconnect;
521
522         # Return the oISBN data structure.  This will be XMLized at a higher layer.
523         return
524                 { metarecord => $mr->[0]->metarecord,
525                   record_list => { map { ($_->record, $_->value) } @$recs } };
526
527 }
528 __PACKAGE__->register_method(
529         method    => 'oISBN',
530         api_name  => 'open-ils.supercat.oisbn',
531         api_level => 1,
532         argc      => 1,
533         signature =>
534                 { desc     => <<"                 DESC",
535 Returns the ISBN list for the metarecord of the requested isbn
536                   DESC
537                   params   =>
538                         [
539                                 { name => 'isbn',
540                                   desc => 'An ISBN.  Duh.',
541                                   type => 'string' },
542                         ],
543                   'return' =>
544                         { desc => 'record to isbn map',
545                           type => 'object' }
546                 }
547 );
548
549 1;