]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm
adding freshmeat feed support
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / SuperCat.pm
1 package OpenILS::Application::SuperCat;
2
3 use strict;
4 use warnings;
5
6 # All OpenSRF applications must be based on OpenSRF::Application or
7 # a subclass thereof.  Makes sense, eh?
8 use OpenSRF::Application;
9 use base qw/OpenSRF::Application/;
10
11 # This is the client class, used for connecting to open-ils.storage
12 use OpenSRF::AppSession;
13
14 # This is an extention of Error.pm that supplies some error types to throw
15 use OpenSRF::EX qw(:try);
16
17 # This is a helper class for querying the OpenSRF Settings application ...
18 use OpenSRF::Utils::SettingsClient;
19
20 # ... and here we have the built in logging helper ...
21 use OpenSRF::Utils::Logger qw($logger);
22
23 # ... and this is our OpenILS object (en|de)coder and psuedo-ORM package.
24 use OpenILS::Utils::Fieldmapper;
25
26
27 # We'll be working with XML, so...
28 use XML::LibXML;
29 use XML::LibXSLT;
30 use Unicode::Normalize;
31
32 use JSON;
33
34 our (
35   $_parser,
36   $_xslt,
37   %record_xslt,
38   %metarecord_xslt,
39 );
40
41 sub child_init {
42         # we need an XML parser
43         $_parser = new XML::LibXML;
44
45         # and an xslt parser
46         $_xslt = new XML::LibXSLT;
47         
48         # parse the MODS xslt ...
49         my $mods3_xslt = $_parser->parse_file(
50                 OpenSRF::Utils::SettingsClient
51                         ->new
52                         ->config_value( dirs => 'xsl' ).
53                 "/MARC21slim2MODS3.xsl"
54         );
55         # and stash a transformer
56         $record_xslt{mods3}{xslt} = $_xslt->parse_stylesheet( $mods3_xslt );
57         $record_xslt{mods3}{namespace_uri} = 'http://www.loc.gov/mods/v3';
58         $record_xslt{mods3}{docs} = 'http://www.loc.gov/mods/';
59         $record_xslt{mods3}{schema_location} = 'http://www.loc.gov/standards/mods/v3/mods-3-1.xsd';
60
61         # parse the MODS xslt ...
62         my $mods_xslt = $_parser->parse_file(
63                 OpenSRF::Utils::SettingsClient
64                         ->new
65                         ->config_value( dirs => 'xsl' ).
66                 "/MARC21slim2MODS.xsl"
67         );
68         # and stash a transformer
69         $record_xslt{mods}{xslt} = $_xslt->parse_stylesheet( $mods_xslt );
70         $record_xslt{mods}{namespace_uri} = 'http://www.loc.gov/mods/';
71         $record_xslt{mods}{docs} = 'http://www.loc.gov/mods/';
72         $record_xslt{mods}{schema_location} = 'http://www.loc.gov/standards/mods/mods.xsd';
73
74         # parse the ATOM entry xslt ...
75         my $atom_xslt = $_parser->parse_file(
76                 OpenSRF::Utils::SettingsClient
77                         ->new
78                         ->config_value( dirs => 'xsl' ).
79                 "/MARC21slim2ATOM.xsl"
80         );
81         # and stash a transformer
82         $record_xslt{atom}{xslt} = $_xslt->parse_stylesheet( $atom_xslt );
83         $record_xslt{atom}{namespace_uri} = 'http://www.w3.org/2005/Atom';
84         $record_xslt{atom}{docs} = 'http://www.ietf.org/rfc/rfc4287.txt';
85
86         # parse the RDFDC xslt ...
87         my $rdf_dc_xslt = $_parser->parse_file(
88                 OpenSRF::Utils::SettingsClient
89                         ->new
90                         ->config_value( dirs => 'xsl' ).
91                 "/MARC21slim2RDFDC.xsl"
92         );
93         # and stash a transformer
94         $record_xslt{rdf_dc}{xslt} = $_xslt->parse_stylesheet( $rdf_dc_xslt );
95         $record_xslt{rdf_dc}{namespace_uri} = 'http://purl.org/dc/elements/1.1/';
96         $record_xslt{rdf_dc}{schema_location} = 'http://purl.org/dc/elements/1.1/';
97
98         # parse the SRWDC xslt ...
99         my $srw_dc_xslt = $_parser->parse_file(
100                 OpenSRF::Utils::SettingsClient
101                         ->new
102                         ->config_value( dirs => 'xsl' ).
103                 "/MARC21slim2SRWDC.xsl"
104         );
105         # and stash a transformer
106         $record_xslt{srw_dc}{xslt} = $_xslt->parse_stylesheet( $srw_dc_xslt );
107         $record_xslt{srw_dc}{namespace_uri} = 'info:srw/schema/1/dc-schema';
108         $record_xslt{srw_dc}{schema_location} = 'http://www.loc.gov/z3950/agency/zing/srw/dc-schema.xsd';
109
110         # parse the OAIDC xslt ...
111         my $oai_dc_xslt = $_parser->parse_file(
112                 OpenSRF::Utils::SettingsClient
113                         ->new
114                         ->config_value( dirs => 'xsl' ).
115                 "/MARC21slim2OAIDC.xsl"
116         );
117         # and stash a transformer
118         $record_xslt{oai_dc}{xslt} = $_xslt->parse_stylesheet( $oai_dc_xslt );
119         $record_xslt{oai_dc}{namespace_uri} = 'http://www.openarchives.org/OAI/2.0/oai_dc/';
120         $record_xslt{oai_dc}{schema_location} = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd';
121
122         # parse the RSS xslt ...
123         my $rss_xslt = $_parser->parse_file(
124                 OpenSRF::Utils::SettingsClient
125                         ->new
126                         ->config_value( dirs => 'xsl' ).
127                 "/MARC21slim2RSS2.xsl"
128         );
129         # and stash a transformer
130         $record_xslt{rss2}{xslt} = $_xslt->parse_stylesheet( $rss_xslt );
131
132         # and finally, a storage server session
133
134         register_record_transforms();
135
136         return 1;
137 }
138
139 sub register_record_transforms {
140         for my $type ( keys %record_xslt ) {
141                 __PACKAGE__->register_method(
142                         method    => 'retrieve_record_transform',
143                         api_name  => "open-ils.supercat.record.$type.retrieve",
144                         api_level => 1,
145                         argc      => 1,
146                         signature =>
147                                 { desc     => "Returns the \U$type\E representation ".
148                                               "of the requested bibliographic record",
149                                   params   =>
150                                         [
151                                                 { name => 'bibId',
152                                                   desc => 'An OpenILS biblio::record_entry id',
153                                                   type => 'number' },
154                                         ],
155                                 'return' =>
156                                         { desc => "The bib record in \U$type\E",
157                                           type => 'string' }
158                                 }
159                 );
160         }
161 }
162
163
164 sub entityize {
165         my $stuff = NFC(shift());
166         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
167         return $stuff;
168 }
169
170
171 sub recent_changes {
172         my $self = shift;
173         my $client = shift;
174         my $when = shift;
175         my $limit = shift;
176
177         if (!$when) {
178                 my ($d,$m,$y) = (localtime)[4,5,6];
179                 $when = sprintf('%4d-%02d-%02d', $y + 1900, $m + 1, $d);
180         }
181
182         my $type = 'biblio';
183         $type = 'authority' if ($self->api_name =~ /authority/o);
184
185         my $axis = 'create_date';
186         $axis = 'edit_date' if ($self->api_name =~ /edit/o);
187
188         my $_storage = OpenSRF::AppSession->create( 'open-ils.storage' );
189
190         return $_storage
191                 ->request(
192                         "open-ils.storage.id_list.$type.record_entry.search_where.atomic",
193                         { $axis => { ">" => $when } },
194                         { order_by => "$axis desc", limit => $limit } )
195                 ->gather(1);
196 }
197
198 for my $t ( qw/biblio authority/ ) {
199         for my $a ( qw/import edit/ ) {
200
201                 __PACKAGE__->register_method(
202                         method    => 'recent_changes',
203                         api_name  => "open-ils.supercat.$t.record.$a.recent",
204                         api_level => 1,
205                         argc      => 0,
206                         signature =>
207                                 { desc     => "Returns a list of recently ${a}ed $t records",
208                                   params   =>
209                                         [
210                                                 { name => 'when',
211                                                   desc => "Date to start looking for ${a}ed records",
212                                                   default => 'today',
213                                                   type => 'string' },
214
215                                                 { name => 'limit',
216                                                   desc => "Maximum count to retrieve",
217                                                   type => 'number' },
218                                         ],
219                                   'return' =>
220                                         { desc => "An id list of $t records",
221                                           type => 'array' }
222                                 },
223                 );
224         }
225 }
226
227
228 sub retrieve_record_marcxml {
229         my $self = shift;
230         my $client = shift;
231         my $rid = shift;
232
233         my $_storage = OpenSRF::AppSession->create( 'open-ils.storage' );
234
235         return
236         entityize(
237                 $_storage
238                         ->request( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rid )
239                         ->gather(1)
240                         ->marc
241         );
242 }
243
244 __PACKAGE__->register_method(
245         method    => 'retrieve_record_marcxml',
246         api_name  => 'open-ils.supercat.record.marcxml.retrieve',
247         api_level => 1,
248         argc      => 1,
249         signature =>
250                 { desc     => <<"                 DESC",
251 Returns the MARCXML representation of the requested bibliographic record
252                   DESC
253                   params   =>
254                         [
255                                 { name => 'bibId',
256                                   desc => 'An OpenILS biblio::record_entry id',
257                                   type => 'number' },
258                         ],
259                   'return' =>
260                         { desc => 'The bib record in MARCXML',
261                           type => 'string' }
262                 }
263 );
264
265 sub retrieve_record_transform {
266         my $self = shift;
267         my $client = shift;
268         my $rid = shift;
269
270         (my $transform = $self->api_name) =~ s/^.+record\.([^\.]+)\.retrieve$/$1/o;
271
272         my $_storage = OpenSRF::AppSession->create( 'open-ils.storage' );
273
274         warn "Fetching record entry $rid\n";
275         my $marc = $_storage->request(
276                 'open-ils.storage.direct.biblio.record_entry.retrieve',
277                 $rid
278         )->gather(1)->marc;
279         warn "Fetched record entry $rid\n";
280
281         return entityize($record_xslt{$transform}{xslt}->transform( $_parser->parse_string( $marc ) )->toString);
282 }
283
284
285 sub retrieve_metarecord_mods {
286         my $self = shift;
287         my $client = shift;
288         my $rid = shift;
289
290         my $_storage = OpenSRF::AppSession->create( 'open-ils.storage' );
291
292         # We want a session
293         $_storage->connect;
294
295         # Get the metarecord in question
296         my $mr =
297         $_storage->request(
298                 'open-ils.storage.direct.metabib.metarecord.retrieve' => $rid
299         )->gather(1);
300
301         # Now get the map of all bib records for the metarecord
302         my $recs =
303         $_storage->request(
304                 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
305                 $rid
306         )->gather(1);
307
308         $logger->debug("Adding ".scalar(@$recs)." bib record to the MODS of the metarecord");
309
310         # and retrieve the lead (master) record as MODS
311         my ($master) =
312                 $self   ->method_lookup('open-ils.supercat.record.mods.retrieve')
313                         ->run($mr->master_record);
314         my $master_mods = $_parser->parse_string($master)->documentElement;
315         $master_mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
316
317         # ... and a MODS clone to populate, with guts removed.
318         my $mods = $_parser->parse_string($master)->documentElement;
319         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
320         ($mods) = $mods->findnodes('//mods:mods');
321         $mods->removeChildNodes;
322
323         # Add the metarecord ID as a (locally defined) info URI
324         my $recordInfo = $mods
325                 ->ownerDocument
326                 ->createElement("mods:recordInfo");
327
328         my $recordIdentifier = $mods
329                 ->ownerDocument
330                 ->createElement("mods:recordIdentifier");
331
332         my ($year,$month,$day) = reverse( (localtime)[3,4,5] );
333         $year += 1900;
334         $month += 1;
335
336         my $id = $mr->id;
337         $recordIdentifier->appendTextNode(
338                 sprintf("tag:open-ils.org,$year-\%0.2d-\%0.2d:metabib-metarecord/$id", $month, $day)
339         );
340
341         $recordInfo->appendChild($recordIdentifier);
342         $mods->appendChild($recordInfo);
343
344         # Grab the title, author and ISBN for the master record and populate the metarecord
345         my ($title) = $master_mods->findnodes( './mods:titleInfo[not(@type)]' );
346         
347         if ($title) {
348                 $title->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
349                 $title = $mods->ownerDocument->importNode($title);
350                 $mods->appendChild($title);
351         }
352
353         my ($author) = $master_mods->findnodes( './mods:name[mods:role/mods:text[text()="creator"]]' );
354         if ($author) {
355                 $author->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
356                 $author = $mods->ownerDocument->importNode($author);
357                 $mods->appendChild($author);
358         }
359
360         my ($isbn) = $master_mods->findnodes( './mods:identifier[@type="isbn"]' );
361         if ($isbn) {
362                 $isbn->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
363                 $isbn = $mods->ownerDocument->importNode($isbn);
364                 $mods->appendChild($isbn);
365         }
366
367         # ... and loop over the constituent records
368         for my $map ( @$recs ) {
369
370                 # get the MODS
371                 my ($rec) =
372                         $self   ->method_lookup('open-ils.supercat.record.mods.retrieve')
373                                 ->run($map->source);
374
375                 my $part_mods = $_parser->parse_string($rec);
376                 $part_mods->documentElement->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
377                 ($part_mods) = $part_mods->findnodes('//mods:mods');
378
379                 for my $node ( ($part_mods->findnodes( './mods:subject' )) ) {
380                         $node->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
381                         $node = $mods->ownerDocument->importNode($node);
382                         $mods->appendChild( $node );
383                 }
384
385                 my $relatedItem = $mods
386                         ->ownerDocument
387                         ->createElement("mods:relatedItem");
388
389                 $relatedItem->setAttribute( type => 'constituent' );
390
391                 my $identifier = $mods
392                         ->ownerDocument
393                         ->createElement("mods:identifier");
394
395                 $identifier->setAttribute( type => 'uri' );
396
397                 my $subRecordInfo = $mods
398                         ->ownerDocument
399                         ->createElement("mods:recordInfo");
400
401                 my $subRecordIdentifier = $mods
402                         ->ownerDocument
403                         ->createElement("mods:recordIdentifier");
404
405                 my $subid = $map->source;
406                 $subRecordIdentifier->appendTextNode(
407                         sprintf("tag:open-ils.org,$year-\%0.2d-\%0.2d:biblio-record_entry/$subid",
408                                 $month,
409                                 $day
410                         )
411                 );
412                 $subRecordInfo->appendChild($subRecordIdentifier);
413
414                 $relatedItem->appendChild( $subRecordInfo );
415
416                 my ($tor) = $part_mods->findnodes( './mods:typeOfResource' );
417                 $tor->setNamespace( "http://www.loc.gov/mods/", "mods", 1 ) if ($tor);
418                 $tor = $mods->ownerDocument->importNode($tor) if ($tor);
419                 $relatedItem->appendChild($tor) if ($tor);
420
421                 if ( my ($part_isbn) = $part_mods->findnodes( './mods:identifier[@type="isbn"]' ) ) {
422                         $part_isbn->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
423                         $part_isbn = $mods->ownerDocument->importNode($part_isbn);
424                         $relatedItem->appendChild( $part_isbn );
425
426                         if (!$isbn) {
427                                 $isbn = $mods->appendChild( $part_isbn->cloneNode(1) );
428                         }
429                 }
430
431                 $mods->appendChild( $relatedItem );
432
433         }
434
435         $_storage->disconnect;
436
437         return entityize($mods->toString);
438
439 }
440 __PACKAGE__->register_method(
441         method    => 'retrieve_metarecord_mods',
442         api_name  => 'open-ils.supercat.metarecord.mods.retrieve',
443         api_level => 1,
444         argc      => 1,
445         signature =>
446                 { desc     => <<"                 DESC",
447 Returns the MODS representation of the requested metarecord
448                   DESC
449                   params   =>
450                         [
451                                 { name => 'metarecordId',
452                                   desc => 'An OpenILS metabib::metarecord id',
453                                   type => 'number' },
454                         ],
455                   'return' =>
456                         { desc => 'The metarecord in MODS',
457                           type => 'string' }
458                 }
459 );
460
461 sub list_metarecord_formats {
462         my @list = (
463                 { mods =>
464                         { namespace_uri   => 'http://www.loc.gov/mods/',
465                           docs            => 'http://www.loc.gov/mods/',
466                           schema_location => 'http://www.loc.gov/standards/mods/mods.xsd',
467                         }
468                 }
469         );
470
471         for my $type ( keys %metarecord_xslt ) {
472                 push @list,
473                         { $type => 
474                                 { namespace_uri   => $metarecord_xslt{$type}{namespace_uri},
475                                   docs            => $metarecord_xslt{$type}{docs},
476                                   schema_location => $metarecord_xslt{$type}{schema_location},
477                                 }
478                         };
479         }
480
481         return \@list;
482 }
483 __PACKAGE__->register_method(
484         method    => 'list_metarecord_formats',
485         api_name  => 'open-ils.supercat.metarecord.formats',
486         api_level => 1,
487         argc      => 0,
488         signature =>
489                 { desc     => <<"                 DESC",
490 Returns the list of valid metarecord formats that supercat understands.
491                   DESC
492                   'return' =>
493                         { desc => 'The format list',
494                           type => 'array' }
495                 }
496 );
497
498
499 sub list_record_formats {
500         my @list = (
501                 { marcxml =>
502                         { namespace_uri   => 'http://www.loc.gov/MARC21/slim',
503                           docs            => 'http://www.loc.gov/marcxml/',
504                           schema_location => 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd',
505                         }
506                 }
507         );
508
509         for my $type ( keys %record_xslt ) {
510                 push @list,
511                         { $type => 
512                                 { namespace_uri   => $record_xslt{$type}{namespace_uri},
513                                   docs            => $record_xslt{$type}{docs},
514                                   schema_location => $record_xslt{$type}{schema_location},
515                                 }
516                         };
517         }
518
519         return \@list;
520 }
521 __PACKAGE__->register_method(
522         method    => 'list_record_formats',
523         api_name  => 'open-ils.supercat.record.formats',
524         api_level => 1,
525         argc      => 0,
526         signature =>
527                 { desc     => <<"                 DESC",
528 Returns the list of valid record formats that supercat understands.
529                   DESC
530                   'return' =>
531                         { desc => 'The format list',
532                           type => 'array' }
533                 }
534 );
535
536
537 sub oISBN {
538         my $self = shift;
539         my $client = shift;
540         my $isbn = shift;
541
542         throw OpenSRF::EX::InvalidArg ('I need an ISBN please')
543                 unless (length($isbn) >= 10);
544
545         my $_storage = OpenSRF::AppSession->create( 'open-ils.storage' );
546
547         # Create a storage session, since we'll be making muliple requests.
548         $_storage->connect;
549
550         # Find the record that has that ISBN.
551         my $bibrec = $_storage->request(
552                 'open-ils.storage.direct.metabib.full_rec.search_where.atomic',
553                 { tag => '020', subfield => 'a', value => { ilike => $isbn.'%'} }
554         )->gather(1);
555
556         # Go away if we don't have one.
557         return {} unless (@$bibrec);
558
559         # Find the metarecord for that bib record.
560         my $mr = $_storage->request(
561                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
562                 $bibrec->[0]->record
563         )->gather(1);
564
565         # Find the other records for that metarecord.
566         my $records = $_storage->request(
567                 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
568                 $mr->[0]->metarecord
569         )->gather(1);
570
571         # Just to be safe.  There's currently no unique constraint on sources...
572         my %unique_recs = map { ($_->source, 1) } @$records;
573         my @rec_list = sort keys %unique_recs;
574
575         # And now fetch the ISBNs for thos records.
576         my $recs = $_storage->request(
577                 'open-ils.storage.direct.metabib.full_rec.search_where.atomic',
578                 { tag => '020', subfield => 'a', record => \@rec_list }
579         )->gather(1);
580
581         # We're done with the storage server session.
582         $_storage->disconnect;
583
584         # Return the oISBN data structure.  This will be XMLized at a higher layer.
585         return
586                 { metarecord => $mr->[0]->metarecord,
587                   record_list => { map { ($_->record, $_->value) } @$recs } };
588
589 }
590 __PACKAGE__->register_method(
591         method    => 'oISBN',
592         api_name  => 'open-ils.supercat.oisbn',
593         api_level => 1,
594         argc      => 1,
595         signature =>
596                 { desc     => <<"                 DESC",
597 Returns the ISBN list for the metarecord of the requested isbn
598                   DESC
599                   params   =>
600                         [
601                                 { name => 'isbn',
602                                   desc => 'An ISBN.  Duh.',
603                                   type => 'string' },
604                         ],
605                   'return' =>
606                         { desc => 'record to isbn map',
607                           type => 'object' }
608                 }
609 );
610
611 1;