39285ca389659d0b66f14b8c7aa9960792ba32db
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
11
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
15
16 use OpenILS::Utils::Fieldmapper;
17
18 use XML::LibXML;
19 use XML::LibXSLT;
20 use Time::HiRes qw(time);
21
22 our %supported_formats = (
23         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
24         mods    => {ns => 'http://www.loc.gov/mods/'},
25         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
26         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
27         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
28         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
29         atom    => {ns => 'http://www.w3.org/2005/Atom'},
30         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
31         rss092  => {ns => ''},
32         rss093  => {ns => ''},
33         rss094  => {ns => ''},
34         rss10   => {ns => 'http://purl.org/rss/1.0/'},
35         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
36         rss2    => {ns => ''},
37 );
38
39
40 my $log = 'OpenSRF::Utils::Logger';
41
42 my  $parser = XML::LibXML->new();
43 my  $xslt = XML::LibXSLT->new();
44
45 my  $mods_sheet;
46 my  $mads_sheet;
47 my  $xpathset = {};
48 sub initialize {}
49 sub child_init {}
50
51 sub post_init {
52
53         unless (keys %$xpathset) {
54                 $log->debug("Running post_init", DEBUG);
55
56                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57
58                 unless ($supported_formats{mods}{xslt}) {
59                         $log->debug("Loading MODS XSLT", DEBUG);
60                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
61                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
62                 }
63
64                 unless ($supported_formats{mods3}{xslt}) {
65                         $log->debug("Loading MODS v3 XSLT", DEBUG);
66                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
67                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
68                 }
69
70
71                 my $req = OpenSRF::AppSession
72                                 ->create('open-ils.cstore')
73                                 
74                                 # XXX testing new metabib field use for faceting
75                                 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
76                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
77
78                                 ->gather(1);
79
80                 if (ref $req and @$req) {
81                         for my $f (@$req) {
82                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
83                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
84                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
85                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
86                         }
87                 }
88         }
89 }
90
91 sub entityize {
92         my $stuff = shift;
93         my $form = shift;
94
95         if ($form eq 'D') {
96                 $stuff = NFD($stuff);
97         } else {
98                 $stuff = NFC($stuff);
99         }
100
101         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
102         return $stuff;
103 }
104
105 # --------------------------------------------------------------------------------
106 # Biblio ingest
107
108 package OpenILS::Application::Ingest::Biblio;
109 use base qw/OpenILS::Application::Ingest/;
110 use Unicode::Normalize;
111
112 sub rw_biblio_ingest_single_object {
113         my $self = shift;
114         my $client = shift;
115         my $bib = shift;
116
117         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
118         return undef unless ($blob);
119
120         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
121         $bib->quality( $blob->{fingerprint}->{quality} );
122
123         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
124
125         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
126
127         # update full_rec stuff ...
128         my $tmp = $cstore->request(
129                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
130                 { record => $bib->id }
131         )->gather(1);
132
133         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
134         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
135
136         # update rec_descriptor stuff ...
137         $tmp = $cstore->request(
138                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
139                 { record => $bib->id }
140         )->gather(1);
141
142         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
143         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
144
145         # deal with classed fields...
146         for my $class ( qw/title author subject keyword series/ ) {
147                 $tmp = $cstore->request(
148                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
149                         { source => $bib->id }
150                 )->gather(1);
151
152                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
153         }
154         for my $obj ( @{ $blob->{field_entries} } ) {
155                 my $class = $obj->class_name;
156                 $class =~ s/^Fieldmapper:://o;
157                 $class =~ s/::/./go;
158                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
159         }
160
161         # update MR map ...
162
163         $tmp = $cstore->request(
164                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
165                 { source => $bib->id }
166         )->gather(1);
167
168         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
169
170         # get the old MRs
171         my $old_mrs = $cstore->request(
172                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
173         )->gather(1) if (@$tmp);
174
175         $old_mrs = [] if (!ref($old_mrs));
176
177         my $mr;
178         for my $m (@$old_mrs) {
179                 if ($m->fingerprint eq $bib->fingerprint) {
180                         $mr = $m;
181                 } else {
182                         my $others = $cstore->request(
183                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
184                         )->gather(1);
185
186                         if (!@$others) {
187                                 $cstore->request(
188                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
189                                 )->gather(1);
190                         }
191
192                         $m->isdeleted(1);
193                 }
194         }
195
196         my $holds;
197         if (!$mr) {
198                 # Get the matchin MR, if any.
199                 $mr = $cstore->request(
200                         'open-ils.cstore.direct.metabib.metarecord.search',
201                         { fingerprint => $bib->fingerprint }
202                 )->gather(1);
203
204                 $holds = $cstore->request(
205                         'open-ils.cstore.direct.action.hold_request.search.atomic',
206                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
207                 )->gather(1) if (@$old_mrs);
208
209                 if ($mr) {
210                         for my $h (@$holds) {
211                                 $h->target($mr);
212                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
213                                 $h->ischanged(1);
214                         }
215                 }
216         }
217
218         if (!$mr) {
219                 $mr = new Fieldmapper::metabib::metarecord;
220                 $mr->fingerprint( $bib->fingerprint );
221                 $mr->master_record( $bib->id );
222                 $mr->id(
223                         $cstore->request(
224                                 "open-ils.cstore.direct.metabib.metarecord.create",
225                                 $mr => { quiet => 'true' }
226                         )->gather(1)
227                 );
228
229                 for my $h (grep { !$_->ischanged } @$holds) {
230                         $h->target($mr);
231                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
232                 }
233         } else {
234                 my $mrm = $cstore->request(
235                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
236                         { metarecord => $mr->id }
237                 )->gather(1);
238
239                 if (@$mrm) {
240                         my $best = $cstore->request(
241                                 "open-ils.cstore.direct.biblio.record_entry.search",
242                                 { id => [ map { $_->source } @$mrm ] },
243                                 { 'select'      => { bre => [ qw/id quality/ ] },
244                                 order_by        => { bre => "quality desc" },
245                                 limit           => 1,
246                                 }
247                         )->gather(1);
248
249                         if ($best->quality > $bib->quality) {
250                                 $mr->master_record($best->id);
251                         } else {
252                                 $mr->master_record($bib->id);
253                         }
254                 } else {
255                         $mr->master_record($bib->id);
256                 }
257
258                 $mr->clear_mods;
259
260                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
261         }
262
263         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
264         $mrm->source($bib->id);
265         $mrm->metarecord($mr->id);
266
267         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
268         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
269
270         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
271
272         return $bib->id;
273 }
274 __PACKAGE__->register_method(  
275         api_name        => "open-ils.ingest.full.biblio.object",
276         method          => "rw_biblio_ingest_single_object",
277         api_level       => 1,
278         argc            => 1,
279 );                      
280
281 sub rw_biblio_ingest_single_record {
282         my $self = shift;
283         my $client = shift;
284         my $rec = shift;
285
286         OpenILS::Application::Ingest->post_init();
287         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
288         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
289
290         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
291
292         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
293         $cstore->disconnect;
294
295         return undef unless ($r and @$r);
296
297         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
298 }
299 __PACKAGE__->register_method(  
300         api_name        => "open-ils.ingest.full.biblio.record",
301         method          => "rw_biblio_ingest_single_record",
302         api_level       => 1,
303         argc            => 1,
304 );                      
305
306 sub rw_biblio_ingest_record_list {
307         my $self = shift;
308         my $client = shift;
309         my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
310
311         OpenILS::Application::Ingest->post_init();
312         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
313         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
314
315         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
316
317         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
318         $cstore->disconnect;
319
320         return undef unless ($r and @$r);
321
322         my $count = 0;
323         $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
324
325         return $count;
326 }
327 __PACKAGE__->register_method(  
328         api_name        => "open-ils.ingest.full.biblio.record_list",
329         method          => "rw_biblio_ingest_record_list",
330         api_level       => 1,
331         argc            => 1,
332 );                      
333
334 sub ro_biblio_ingest_single_object {
335         my $self = shift;
336         my $client = shift;
337         my $bib = shift;
338         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
339
340         my $document = $parser->parse_string($xml);
341
342         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
343         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
344         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
345         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
346
347         $_->source($bib->id) for (@mXfe);
348         $_->record($bib->id) for (@mfr);
349         $rd->record($bib->id) if ($rd);
350
351         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
352 }
353 __PACKAGE__->register_method(  
354         api_name        => "open-ils.ingest.full.biblio.object.readonly",
355         method          => "ro_biblio_ingest_single_object",
356         api_level       => 1,
357         argc            => 1,
358 );                      
359
360 sub ro_biblio_ingest_single_xml {
361         my $self = shift;
362         my $client = shift;
363         my $xml = OpenILS::Application::Ingest::entityize(shift);
364
365         my $document = $parser->parse_string($xml);
366
367         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
368         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
369         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
370         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
371
372         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
373 }
374 __PACKAGE__->register_method(  
375         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
376         method          => "ro_biblio_ingest_single_xml",
377         api_level       => 1,
378         argc            => 1,
379 );                      
380
381 sub ro_biblio_ingest_single_record {
382         my $self = shift;
383         my $client = shift;
384         my $rec = shift;
385
386         OpenILS::Application::Ingest->post_init();
387         my $r = OpenSRF::AppSession
388                         ->create('open-ils.cstore')
389                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
390                         ->gather(1);
391
392         return undef unless ($r and @$r);
393
394         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
395
396         $_->source($rec) for (@{$res->{field_entries}});
397         $_->record($rec) for (@{$res->{full_rec}});
398         $res->{descriptor}->record($rec);
399
400         return $res;
401 }
402 __PACKAGE__->register_method(  
403         api_name        => "open-ils.ingest.full.biblio.record.readonly",
404         method          => "ro_biblio_ingest_single_record",
405         api_level       => 1,
406         argc            => 1,
407 );                      
408
409 sub ro_biblio_ingest_stream_record {
410         my $self = shift;
411         my $client = shift;
412
413         OpenILS::Application::Ingest->post_init();
414
415         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
416
417         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
418         
419                 my $rec = $resp->content;
420                 last unless (defined $rec);
421
422                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
423                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
424
425                 $_->source($rec) for (@{$res->{field_entries}});
426                 $_->record($rec) for (@{$res->{full_rec}});
427
428                 $client->respond( $res );
429         }
430
431         return undef;
432 }
433 __PACKAGE__->register_method(  
434         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
435         method          => "ro_biblio_ingest_stream_record",
436         api_level       => 1,
437         stream          => 1,
438 );                      
439
440 sub ro_biblio_ingest_stream_xml {
441         my $self = shift;
442         my $client = shift;
443
444         OpenILS::Application::Ingest->post_init();
445
446         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
447
448         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
449         
450                 my $xml = $resp->content;
451                 last unless (defined $xml);
452
453                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
454                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
455
456                 $client->respond( $res );
457         }
458
459         return undef;
460 }
461 __PACKAGE__->register_method(  
462         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
463         method          => "ro_biblio_ingest_stream_xml",
464         api_level       => 1,
465         stream          => 1,
466 );                      
467
468 sub rw_biblio_ingest_stream_import {
469         my $self = shift;
470         my $client = shift;
471
472         OpenILS::Application::Ingest->post_init();
473
474         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
475
476         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
477         
478                 my $bib = $resp->content;
479                 last unless (defined $bib);
480
481                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
482                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
483
484                 $_->source($bib->id) for (@{$res->{field_entries}});
485                 $_->record($bib->id) for (@{$res->{full_rec}});
486
487                 $client->respond( $res );
488         }
489
490         return undef;
491 }
492 __PACKAGE__->register_method(  
493         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
494         method          => "rw_biblio_ingest_stream_import",
495         api_level       => 1,
496         stream          => 1,
497 );                      
498
499
500 # --------------------------------------------------------------------------------
501 # Authority ingest
502
503 package OpenILS::Application::Ingest::Authority;
504 use base qw/OpenILS::Application::Ingest/;
505 use Unicode::Normalize;
506
507 sub ro_authority_ingest_single_object {
508         my $self = shift;
509         my $client = shift;
510         my $bib = shift;
511         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
512
513         my $document = $parser->parse_string($xml);
514
515         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
516
517         $_->record($bib->id) for (@mfr);
518
519         return { full_rec => \@mfr };
520 }
521 __PACKAGE__->register_method(  
522         api_name        => "open-ils.ingest.full.authority.object.readonly",
523         method          => "ro_authority_ingest_single_object",
524         api_level       => 1,
525         argc            => 1,
526 );                      
527
528 sub ro_authority_ingest_single_xml {
529         my $self = shift;
530         my $client = shift;
531         my $xml = OpenILS::Application::Ingest::entityize(shift);
532
533         my $document = $parser->parse_string($xml);
534
535         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
536
537         return { full_rec => \@mfr };
538 }
539 __PACKAGE__->register_method(  
540         api_name        => "open-ils.ingest.full.authority.xml.readonly",
541         method          => "ro_authority_ingest_single_xml",
542         api_level       => 1,
543         argc            => 1,
544 );                      
545
546 sub ro_authority_ingest_single_record {
547         my $self = shift;
548         my $client = shift;
549         my $rec = shift;
550
551         OpenILS::Application::Ingest->post_init();
552         my $r = OpenSRF::AppSession
553                         ->create('open-ils.cstore')
554                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
555                         ->gather(1);
556
557         return undef unless ($r and @$r);
558
559         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
560
561         $_->record($rec) for (@{$res->{full_rec}});
562         $res->{descriptor}->record($rec);
563
564         return $res;
565 }
566 __PACKAGE__->register_method(  
567         api_name        => "open-ils.ingest.full.authority.record.readonly",
568         method          => "ro_authority_ingest_single_record",
569         api_level       => 1,
570         argc            => 1,
571 );                      
572
573 sub ro_authority_ingest_stream_record {
574         my $self = shift;
575         my $client = shift;
576
577         OpenILS::Application::Ingest->post_init();
578
579         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
580
581         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
582         
583                 my $rec = $resp->content;
584                 last unless (defined $rec);
585
586                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
587                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
588
589                 $_->record($rec) for (@{$res->{full_rec}});
590
591                 $client->respond( $res );
592         }
593
594         return undef;
595 }
596 __PACKAGE__->register_method(  
597         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
598         method          => "ro_authority_ingest_stream_record",
599         api_level       => 1,
600         stream          => 1,
601 );                      
602
603 sub ro_authority_ingest_stream_xml {
604         my $self = shift;
605         my $client = shift;
606
607         OpenILS::Application::Ingest->post_init();
608
609         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
610
611         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
612         
613                 my $xml = $resp->content;
614                 last unless (defined $xml);
615
616                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
617                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
618
619                 $client->respond( $res );
620         }
621
622         return undef;
623 }
624 __PACKAGE__->register_method(  
625         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
626         method          => "ro_authority_ingest_stream_xml",
627         api_level       => 1,
628         stream          => 1,
629 );                      
630
631 sub rw_authority_ingest_stream_import {
632         my $self = shift;
633         my $client = shift;
634
635         OpenILS::Application::Ingest->post_init();
636
637         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
638
639         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
640         
641                 my $bib = $resp->content;
642                 last unless (defined $bib);
643
644                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
645                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
646
647                 $_->record($bib->id) for (@{$res->{full_rec}});
648
649                 $client->respond( $res );
650         }
651
652         return undef;
653 }
654 __PACKAGE__->register_method(  
655         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
656         method          => "rw_authority_ingest_stream_import",
657         api_level       => 1,
658         stream          => 1,
659 );                      
660
661
662 # --------------------------------------------------------------------------------
663 # MARC index extraction
664
665 package OpenILS::Application::Ingest::XPATH;
666 use base qw/OpenILS::Application::Ingest/;
667 use Unicode::Normalize;
668
669 # give this an XML documentElement and an XPATH expression
670 sub xpath_to_string {
671         my $xml = shift;
672         my $xpath = shift;
673         my $ns_uri = shift;
674         my $ns_prefix = shift;
675         my $unique = shift;
676
677         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
678
679         my $string = "";
680
681         # grab the set of matching nodes
682         my @nodes = $xml->findnodes( $xpath );
683         for my $value (@nodes) {
684
685                 # grab all children of the node
686                 my @children = $value->childNodes();
687                 for my $child (@children) {
688
689                         # add the childs content to the growing buffer
690                         my $content = quotemeta($child->textContent);
691                         next if ($unique && $string =~ /$content/);  # uniquify the values
692                         $string .= $child->textContent . " ";
693                 }
694                 if( ! @children ) {
695                         $string .= $value->textContent . " ";
696                 }
697         }
698         return NFD($string);
699 }
700
701 sub class_index_string_xml {
702         my $self = shift;
703         my $client = shift;
704         my $xml = shift;
705         my @classes = @_;
706
707         OpenILS::Application::Ingest->post_init();
708         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
709
710         my %transform_cache;
711         
712         for my $class (@classes) {
713                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
714                 for my $type ( keys %{ $xpathset->{$class} } ) {
715
716                         my $def = $xpathset->{$class}->{$type};
717                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
718
719                         my $document = $xml;
720
721                         if ($sf->{xslt}) {
722                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
723                                 $transform_cache{$def->{format}} = $document;
724                         }
725
726                         my $value =  xpath_to_string(
727                                         $document->documentElement      => $def->{xpath},
728                                         $sf->{ns}                       => $def->{format},
729                                         1
730                         );
731
732                         next unless $value;
733
734                         $value = NFD($value);
735                         $value =~ s/\pM+//sgo;
736                         $value =~ s/\pC+//sgo;
737                         $value =~ s/\W+$//sgo;
738
739                         $value =~ s/\b\.+\b//sgo;
740                         $value = lc($value);
741
742                         my $fm = $class_constructor->new;
743                         $fm->value( $value );
744                         $fm->field( $xpathset->{$class}->{$type}->{id} );
745                         $client->respond($fm);
746                 }
747         }
748         return undef;
749 }
750 __PACKAGE__->register_method(  
751         api_name        => "open-ils.ingest.field_entry.class.xml",
752         method          => "class_index_string_xml",
753         api_level       => 1,
754         argc            => 2,
755         stream          => 1,
756 );                      
757
758 sub class_index_string_record {
759         my $self = shift;
760         my $client = shift;
761         my $rec = shift;
762         my @classes = shift;
763
764         OpenILS::Application::Ingest->post_init();
765         my $r = OpenSRF::AppSession
766                         ->create('open-ils.cstore')
767                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
768                         ->gather(1);
769
770         return undef unless ($r and @$r);
771
772         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
773                 $fm->source($rec);
774                 $client->respond($fm);
775         }
776         return undef;
777 }
778 __PACKAGE__->register_method(  
779         api_name        => "open-ils.ingest.field_entry.class.record",
780         method          => "class_index_string_record",
781         api_level       => 1,
782         argc            => 2,
783         stream          => 1,
784 );                      
785
786 sub all_index_string_xml {
787         my $self = shift;
788         my $client = shift;
789         my $xml = shift;
790
791         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
792                 $client->respond($fm);
793         }
794         return undef;
795 }
796 __PACKAGE__->register_method(  
797         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
798         method          => "all_index_string_xml",
799         api_level       => 1,
800         argc            => 1,
801         stream          => 1,
802 );                      
803
804 sub all_index_string_record {
805         my $self = shift;
806         my $client = shift;
807         my $rec = shift;
808
809         OpenILS::Application::Ingest->post_init();
810         my $r = OpenSRF::AppSession
811                         ->create('open-ils.cstore')
812                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
813                         ->gather(1);
814
815         return undef unless ($r and @$r);
816
817         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
818                 $fm->source($rec);
819                 $client->respond($fm);
820         }
821         return undef;
822 }
823 __PACKAGE__->register_method(  
824         api_name        => "open-ils.ingest.extract.field_entry.all.record",
825         method          => "all_index_string_record",
826         api_level       => 1,
827         argc            => 1,
828         stream          => 1,
829 );                      
830
831 # --------------------------------------------------------------------------------
832 # Flat MARC
833
834 package OpenILS::Application::Ingest::FlatMARC;
835 use base qw/OpenILS::Application::Ingest/;
836 use Unicode::Normalize;
837
838
839 sub _marcxml_to_full_rows {
840
841         my $marcxml = shift;
842         my $xmltype = shift || 'metabib';
843
844         my $type = "Fieldmapper::${xmltype}::full_rec";
845
846         my @ns_list;
847         
848         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
849
850         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
851                 next unless $tagline;
852
853                 my $ns = $type->new;
854
855                 $ns->tag( 'LDR' );
856                 my $val = $tagline->textContent;
857                 $val = NFD($val);
858                 $val =~ s/\pM+//sgo;
859                 $val =~ s/\pC+//sgo;
860                 $val =~ s/\W+$//sgo;
861                 $ns->value( $val );
862
863                 push @ns_list, $ns;
864         }
865
866         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
867                 next unless $tagline;
868
869                 my $ns = $type->new;
870
871                 $ns->tag( $tagline->getAttribute( "tag" ) );
872                 my $val = $tagline->textContent;
873                 $val = NFD($val);
874                 $val =~ s/\pM+//sgo;
875                 $val =~ s/\pC+//sgo;
876                 $val =~ s/\W+$//sgo;
877                 $ns->value( $val );
878
879                 push @ns_list, $ns;
880         }
881
882         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
883                 next unless $tagline;
884
885                 my $tag = $tagline->getAttribute( "tag" );
886                 my $ind1 = $tagline->getAttribute( "ind1" );
887                 my $ind2 = $tagline->getAttribute( "ind2" );
888
889                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
890                         next unless $data;
891
892                         my $ns = $type->new;
893
894                         $ns->tag( $tag );
895                         $ns->ind1( $ind1 );
896                         $ns->ind2( $ind2 );
897                         $ns->subfield( $data->getAttribute( "code" ) );
898                         my $val = $data->textContent;
899                         $val = NFD($val);
900                         $val =~ s/\pM+//sgo;
901                         $val =~ s/\pC+//sgo;
902                         $val =~ s/\W+$//sgo;
903                         $ns->value( lc($val) );
904
905                         push @ns_list, $ns;
906                 }
907         }
908
909         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
910         return @ns_list;
911 }
912
913 sub flat_marc_xml {
914         my $self = shift;
915         my $client = shift;
916         my $xml = shift;
917
918         $log->debug("processing [$xml]");
919
920         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
921
922         my $type = 'metabib';
923         $type = 'authority' if ($self->api_name =~ /authority/o);
924
925         OpenILS::Application::Ingest->post_init();
926
927         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
928         return undef;
929 }
930 __PACKAGE__->register_method(  
931         api_name        => "open-ils.ingest.flat_marc.authority.xml",
932         method          => "flat_marc_xml",
933         api_level       => 1,
934         argc            => 1,
935         stream          => 1,
936 );                      
937 __PACKAGE__->register_method(  
938         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
939         method          => "flat_marc_xml",
940         api_level       => 1,
941         argc            => 1,
942         stream          => 1,
943 );                      
944
945 sub flat_marc_record {
946         my $self = shift;
947         my $client = shift;
948         my $rec = shift;
949
950         my $type = 'biblio';
951         $type = 'authority' if ($self->api_name =~ /authority/o);
952
953         OpenILS::Application::Ingest->post_init();
954         my $r = OpenSRF::AppSession
955                         ->create('open-ils.cstore')
956                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
957                         ->gather(1);
958
959
960         return undef unless ($r and $r->marc);
961
962         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
963         for my $row (@rows) {
964                 $client->respond($row);
965                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
966         }
967         return undef;
968 }
969 __PACKAGE__->register_method(  
970         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
971         method          => "flat_marc_record",
972         api_level       => 1,
973         argc            => 1,
974         stream          => 1,
975 );                      
976 __PACKAGE__->register_method(  
977         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
978         method          => "flat_marc_record",
979         api_level       => 1,
980         argc            => 1,
981         stream          => 1,
982 );                      
983
984 # --------------------------------------------------------------------------------
985 # Fingerprinting
986
987 package OpenILS::Application::Ingest::Biblio::Fingerprint;
988 use base qw/OpenILS::Application::Ingest/;
989 use Unicode::Normalize;
990 use OpenSRF::EX qw/:try/;
991
992 sub biblio_fingerprint_record {
993         my $self = shift;
994         my $client = shift;
995         my $rec = shift;
996
997         OpenILS::Application::Ingest->post_init();
998
999         my $r = OpenSRF::AppSession
1000                         ->create('open-ils.cstore')
1001                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1002                         ->gather(1);
1003
1004         return undef unless ($r and $r->marc);
1005
1006         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1007         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1008         $fp->{quality} = int($fp->{quality});
1009         return $fp;
1010 }
1011 __PACKAGE__->register_method(  
1012         api_name        => "open-ils.ingest.fingerprint.record",
1013         method          => "biblio_fingerprint_record",
1014         api_level       => 1,
1015         argc            => 1,
1016 );                      
1017
1018 our $fp_script;
1019 sub biblio_fingerprint {
1020         my $self = shift;
1021         my $client = shift;
1022         my $xml = OpenILS::Application::Ingest::entityize(shift);
1023
1024         $log->internal("Got MARC [$xml]");
1025
1026         if(!$fp_script) {
1027                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1028                 my $conf = OpenSRF::Utils::SettingsClient->new;
1029
1030                 my $libs        = $conf->config_value(@pfx, 'script_path');
1031                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1032                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1033
1034                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1035                 
1036                 $fp_script = new OpenILS::Utils::ScriptRunner
1037                         ( file          => $script_file,
1038                           paths         => $script_libs,
1039                           reset_count   => 100 );
1040         }
1041
1042         $fp_script->insert('environment' => {marc => $xml} => 1);
1043
1044         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1045         $log->debug("Script for biblio fingerprinting completed successfully...");
1046
1047         return $res;
1048 }
1049 __PACKAGE__->register_method(  
1050         api_name        => "open-ils.ingest.fingerprint.xml",
1051         method          => "biblio_fingerprint",
1052         api_level       => 1,
1053         argc            => 1,
1054 );                      
1055
1056 our $rd_script;
1057 sub biblio_descriptor {
1058         my $self = shift;
1059         my $client = shift;
1060         my $xml = OpenILS::Application::Ingest::entityize(shift);
1061
1062         $log->internal("Got MARC [$xml]");
1063
1064         if(!$rd_script) {
1065                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1066                 my $conf = OpenSRF::Utils::SettingsClient->new;
1067
1068                 my $libs        = $conf->config_value(@pfx, 'script_path');
1069                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1070                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1071
1072                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1073                 
1074                 $rd_script = new OpenILS::Utils::ScriptRunner
1075                         ( file          => $script_file,
1076                           paths         => $script_libs,
1077                           reset_count   => 100 );
1078         }
1079
1080         $log->debug("Setting up environment for descriptor extraction script...");
1081         $rd_script->insert('environment.marc' => $xml => 1);
1082         $log->debug("Environment building complete...");
1083
1084         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1085         $log->debug("Script for biblio descriptor extraction completed successfully");
1086
1087         return $res;
1088 }
1089 __PACKAGE__->register_method(  
1090         api_name        => "open-ils.ingest.descriptor.xml",
1091         method          => "biblio_descriptor",
1092         api_level       => 1,
1093         argc            => 1,
1094 );                      
1095
1096
1097 1;
1098