]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
removing vestigial traces of the old "worm" stuff
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use OpenSRF::Utils::JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21 our %supported_formats = (
22         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
23         mods    => {ns => 'http://www.loc.gov/mods/'},
24         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
26         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28         atom    => {ns => 'http://www.w3.org/2005/Atom'},
29         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
30         rss092  => {ns => ''},
31         rss093  => {ns => ''},
32         rss094  => {ns => ''},
33         rss10   => {ns => 'http://purl.org/rss/1.0/'},
34         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
35         rss2    => {ns => ''},
36 );
37
38
39 my $log = 'OpenSRF::Utils::Logger';
40
41 my  $parser = XML::LibXML->new();
42 my  $xslt = XML::LibXSLT->new();
43
44 my  $mods_sheet;
45 my  $mads_sheet;
46 my  $xpathset = {};
47 sub initialize {}
48 sub child_init {}
49
50 sub post_init {
51
52         unless (keys %$xpathset) {
53                 $log->debug("Running post_init", DEBUG);
54
55                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56
57                 unless ($supported_formats{mods}{xslt}) {
58                         $log->debug("Loading MODS XSLT", DEBUG);
59                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61                 }
62
63                 unless ($supported_formats{mods3}{xslt}) {
64                         $log->debug("Loading MODS v3 XSLT", DEBUG);
65                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67                 }
68
69
70                 my $req = OpenSRF::AppSession
71                                 ->create('open-ils.cstore')
72                                 
73                                 # XXX testing new metabib field use for faceting
74                                 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
76
77                                 ->gather(1);
78
79                 if (ref $req and @$req) {
80                         for my $f (@$req) {
81                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
82                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
83                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
84                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
85                         }
86                 }
87         }
88 }
89
90 sub entityize {
91         my $stuff = shift;
92         my $form = shift;
93
94         if ($form eq 'D') {
95                 $stuff = NFD($stuff);
96         } else {
97                 $stuff = NFC($stuff);
98         }
99
100         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
101         return $stuff;
102 }
103
104 # --------------------------------------------------------------------------------
105 # Biblio ingest
106
107 package OpenILS::Application::Ingest::Biblio;
108 use base qw/OpenILS::Application::Ingest/;
109 use Unicode::Normalize;
110
111 sub rw_biblio_ingest_single_object {
112         my $self = shift;
113         my $client = shift;
114         my $bib = shift;
115
116         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
117         return undef unless ($blob);
118
119         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
120         $bib->quality( $blob->{fingerprint}->{quality} );
121
122         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
123
124         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
125
126         # update full_rec stuff ...
127         my $tmp = $cstore->request(
128                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
129                 { record => $bib->id }
130         )->gather(1);
131
132         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
133         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
134
135         # update rec_descriptor stuff ...
136         $tmp = $cstore->request(
137                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
138                 { record => $bib->id }
139         )->gather(1);
140
141         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
142         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
143
144         # deal with classed fields...
145         for my $class ( qw/title author subject keyword series/ ) {
146                 $tmp = $cstore->request(
147                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
148                         { source => $bib->id }
149                 )->gather(1);
150
151                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
152         }
153         for my $obj ( @{ $blob->{field_entries} } ) {
154                 my $class = $obj->class_name;
155                 $class =~ s/^Fieldmapper:://o;
156                 $class =~ s/::/./go;
157                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
158         }
159
160         # update MR map ...
161
162         $tmp = $cstore->request(
163                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
164                 { source => $bib->id }
165         )->gather(1);
166
167         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
168
169         # get the old MRs
170         my $old_mrs = $cstore->request(
171                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
172         )->gather(1) if (@$tmp);
173
174         $old_mrs = [] if (!ref($old_mrs));
175
176         my $mr;
177         for my $m (@$old_mrs) {
178                 if ($m->fingerprint eq $bib->fingerprint) {
179                         $mr = $m;
180                 } else {
181                         my $others = $cstore->request(
182                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
183                         )->gather(1);
184
185                         if (!@$others) {
186                                 $cstore->request(
187                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
188                                 )->gather(1);
189                         }
190
191                         $m->isdeleted(1);
192                 }
193         }
194
195         my $holds;
196         if (!$mr) {
197                 # Get the matchin MR, if any.
198                 $mr = $cstore->request(
199                         'open-ils.cstore.direct.metabib.metarecord.search',
200                         { fingerprint => $bib->fingerprint }
201                 )->gather(1);
202
203                 $holds = $cstore->request(
204                         'open-ils.cstore.direct.action.hold_request.search.atomic',
205                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
206                 )->gather(1) if (@$old_mrs);
207
208                 if ($mr) {
209                         for my $h (@$holds) {
210                                 $h->target($mr);
211                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
212                                 $h->ischanged(1);
213                         }
214                 }
215         }
216
217         if (!$mr) {
218                 $mr = new Fieldmapper::metabib::metarecord;
219                 $mr->fingerprint( $bib->fingerprint );
220                 $mr->master_record( $bib->id );
221                 $mr->id(
222                         $cstore->request(
223                                 "open-ils.cstore.direct.metabib.metarecord.create",
224                                 $mr => { quiet => 'true' }
225                         )->gather(1)
226                 );
227
228                 for my $h (grep { !$_->ischanged } @$holds) {
229                         $h->target($mr);
230                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
231                 }
232         } else {
233                 my $mrm = $cstore->request(
234                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
235                         { metarecord => $mr->id }
236                 )->gather(1);
237
238                 if (@$mrm) {
239                         my $best = $cstore->request(
240                                 "open-ils.cstore.direct.biblio.record_entry.search",
241                                 { id => [ map { $_->source } @$mrm ] },
242                                 { 'select'      => { bre => [ qw/id quality/ ] },
243                                 order_by        => { bre => "quality desc" },
244                                 limit           => 1,
245                                 }
246                         )->gather(1);
247
248                         if ($best->quality > $bib->quality) {
249                                 $mr->master_record($best->id);
250                         } else {
251                                 $mr->master_record($bib->id);
252                         }
253                 } else {
254                         $mr->master_record($bib->id);
255                 }
256
257                 $mr->clear_mods;
258
259                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
260         }
261
262         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
263         $mrm->source($bib->id);
264         $mrm->metarecord($mr->id);
265
266         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
267         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
268
269         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
270
271         return $bib->id;
272 }
273 __PACKAGE__->register_method(  
274         api_name        => "open-ils.ingest.full.biblio.object",
275         method          => "rw_biblio_ingest_single_object",
276         api_level       => 1,
277         argc            => 1,
278 );                      
279
280 sub rw_biblio_ingest_single_record {
281         my $self = shift;
282         my $client = shift;
283         my $rec = shift;
284
285         OpenILS::Application::Ingest->post_init();
286         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
287         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
288
289         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
290
291         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
292         $cstore->disconnect;
293
294         return undef unless ($r and @$r);
295
296         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
297 }
298 __PACKAGE__->register_method(  
299         api_name        => "open-ils.ingest.full.biblio.record",
300         method          => "rw_biblio_ingest_single_record",
301         api_level       => 1,
302         argc            => 1,
303 );                      
304
305 sub rw_biblio_ingest_record_list {
306         my $self = shift;
307         my $client = shift;
308         my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
309
310         OpenILS::Application::Ingest->post_init();
311         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
312         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
313
314         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
315
316         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
317         $cstore->disconnect;
318
319         return undef unless ($r and @$r);
320
321         my $count = 0;
322         $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
323
324         return $count;
325 }
326 __PACKAGE__->register_method(  
327         api_name        => "open-ils.ingest.full.biblio.record_list",
328         method          => "rw_biblio_ingest_record_list",
329         api_level       => 1,
330         argc            => 1,
331 );                      
332
333 sub ro_biblio_ingest_single_object {
334         my $self = shift;
335         my $client = shift;
336         my $bib = shift;
337         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
338
339         my $document = $parser->parse_string($xml);
340
341         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
342         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
343         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
344         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
345
346         $_->source($bib->id) for (@mXfe);
347         $_->record($bib->id) for (@mfr);
348         $rd->record($bib->id) if ($rd);
349
350         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
351 }
352 __PACKAGE__->register_method(  
353         api_name        => "open-ils.ingest.full.biblio.object.readonly",
354         method          => "ro_biblio_ingest_single_object",
355         api_level       => 1,
356         argc            => 1,
357 );                      
358
359 sub ro_biblio_ingest_single_xml {
360         my $self = shift;
361         my $client = shift;
362         my $xml = OpenILS::Application::Ingest::entityize(shift);
363
364         my $document = $parser->parse_string($xml);
365
366         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
367         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
368         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
369         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
370
371         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
372 }
373 __PACKAGE__->register_method(  
374         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
375         method          => "ro_biblio_ingest_single_xml",
376         api_level       => 1,
377         argc            => 1,
378 );                      
379
380 sub ro_biblio_ingest_single_record {
381         my $self = shift;
382         my $client = shift;
383         my $rec = shift;
384
385         OpenILS::Application::Ingest->post_init();
386         my $r = OpenSRF::AppSession
387                         ->create('open-ils.cstore')
388                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
389                         ->gather(1);
390
391         return undef unless ($r and @$r);
392
393         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
394
395         $_->source($rec) for (@{$res->{field_entries}});
396         $_->record($rec) for (@{$res->{full_rec}});
397         $res->{descriptor}->record($rec);
398
399         return $res;
400 }
401 __PACKAGE__->register_method(  
402         api_name        => "open-ils.ingest.full.biblio.record.readonly",
403         method          => "ro_biblio_ingest_single_record",
404         api_level       => 1,
405         argc            => 1,
406 );                      
407
408 sub ro_biblio_ingest_stream_record {
409         my $self = shift;
410         my $client = shift;
411
412         OpenILS::Application::Ingest->post_init();
413
414         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
415
416         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
417         
418                 my $rec = $resp->content;
419                 last unless (defined $rec);
420
421                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
422                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
423
424                 $_->source($rec) for (@{$res->{field_entries}});
425                 $_->record($rec) for (@{$res->{full_rec}});
426
427                 $client->respond( $res );
428         }
429
430         return undef;
431 }
432 __PACKAGE__->register_method(  
433         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
434         method          => "ro_biblio_ingest_stream_record",
435         api_level       => 1,
436         stream          => 1,
437 );                      
438
439 sub ro_biblio_ingest_stream_xml {
440         my $self = shift;
441         my $client = shift;
442
443         OpenILS::Application::Ingest->post_init();
444
445         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
446
447         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
448         
449                 my $xml = $resp->content;
450                 last unless (defined $xml);
451
452                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
453                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
454
455                 $client->respond( $res );
456         }
457
458         return undef;
459 }
460 __PACKAGE__->register_method(  
461         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
462         method          => "ro_biblio_ingest_stream_xml",
463         api_level       => 1,
464         stream          => 1,
465 );                      
466
467 sub rw_biblio_ingest_stream_import {
468         my $self = shift;
469         my $client = shift;
470
471         OpenILS::Application::Ingest->post_init();
472
473         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
474
475         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
476         
477                 my $bib = $resp->content;
478                 last unless (defined $bib);
479
480                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
481                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
482
483                 $_->source($bib->id) for (@{$res->{field_entries}});
484                 $_->record($bib->id) for (@{$res->{full_rec}});
485
486                 $client->respond( $res );
487         }
488
489         return undef;
490 }
491 __PACKAGE__->register_method(  
492         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
493         method          => "rw_biblio_ingest_stream_import",
494         api_level       => 1,
495         stream          => 1,
496 );                      
497
498
499 # --------------------------------------------------------------------------------
500 # Authority ingest
501
502 package OpenILS::Application::Ingest::Authority;
503 use base qw/OpenILS::Application::Ingest/;
504 use Unicode::Normalize;
505
506 sub ro_authority_ingest_single_object {
507         my $self = shift;
508         my $client = shift;
509         my $bib = shift;
510         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
511
512         my $document = $parser->parse_string($xml);
513
514         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
515
516         $_->record($bib->id) for (@mfr);
517
518         return { full_rec => \@mfr };
519 }
520 __PACKAGE__->register_method(  
521         api_name        => "open-ils.ingest.full.authority.object.readonly",
522         method          => "ro_authority_ingest_single_object",
523         api_level       => 1,
524         argc            => 1,
525 );                      
526
527 sub ro_authority_ingest_single_xml {
528         my $self = shift;
529         my $client = shift;
530         my $xml = OpenILS::Application::Ingest::entityize(shift);
531
532         my $document = $parser->parse_string($xml);
533
534         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
535
536         return { full_rec => \@mfr };
537 }
538 __PACKAGE__->register_method(  
539         api_name        => "open-ils.ingest.full.authority.xml.readonly",
540         method          => "ro_authority_ingest_single_xml",
541         api_level       => 1,
542         argc            => 1,
543 );                      
544
545 sub ro_authority_ingest_single_record {
546         my $self = shift;
547         my $client = shift;
548         my $rec = shift;
549
550         OpenILS::Application::Ingest->post_init();
551         my $r = OpenSRF::AppSession
552                         ->create('open-ils.cstore')
553                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
554                         ->gather(1);
555
556         return undef unless ($r and @$r);
557
558         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
559
560         $_->record($rec) for (@{$res->{full_rec}});
561         $res->{descriptor}->record($rec);
562
563         return $res;
564 }
565 __PACKAGE__->register_method(  
566         api_name        => "open-ils.ingest.full.authority.record.readonly",
567         method          => "ro_authority_ingest_single_record",
568         api_level       => 1,
569         argc            => 1,
570 );                      
571
572 sub ro_authority_ingest_stream_record {
573         my $self = shift;
574         my $client = shift;
575
576         OpenILS::Application::Ingest->post_init();
577
578         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
579
580         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
581         
582                 my $rec = $resp->content;
583                 last unless (defined $rec);
584
585                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
586                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
587
588                 $_->record($rec) for (@{$res->{full_rec}});
589
590                 $client->respond( $res );
591         }
592
593         return undef;
594 }
595 __PACKAGE__->register_method(  
596         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
597         method          => "ro_authority_ingest_stream_record",
598         api_level       => 1,
599         stream          => 1,
600 );                      
601
602 sub ro_authority_ingest_stream_xml {
603         my $self = shift;
604         my $client = shift;
605
606         OpenILS::Application::Ingest->post_init();
607
608         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
609
610         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
611         
612                 my $xml = $resp->content;
613                 last unless (defined $xml);
614
615                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
616                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
617
618                 $client->respond( $res );
619         }
620
621         return undef;
622 }
623 __PACKAGE__->register_method(  
624         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
625         method          => "ro_authority_ingest_stream_xml",
626         api_level       => 1,
627         stream          => 1,
628 );                      
629
630 sub rw_authority_ingest_stream_import {
631         my $self = shift;
632         my $client = shift;
633
634         OpenILS::Application::Ingest->post_init();
635
636         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
637
638         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
639         
640                 my $bib = $resp->content;
641                 last unless (defined $bib);
642
643                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
644                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
645
646                 $_->record($bib->id) for (@{$res->{full_rec}});
647
648                 $client->respond( $res );
649         }
650
651         return undef;
652 }
653 __PACKAGE__->register_method(  
654         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
655         method          => "rw_authority_ingest_stream_import",
656         api_level       => 1,
657         stream          => 1,
658 );                      
659
660
661 # --------------------------------------------------------------------------------
662 # MARC index extraction
663
664 package OpenILS::Application::Ingest::XPATH;
665 use base qw/OpenILS::Application::Ingest/;
666 use Unicode::Normalize;
667
668 # give this an XML documentElement and an XPATH expression
669 sub xpath_to_string {
670         my $xml = shift;
671         my $xpath = shift;
672         my $ns_uri = shift;
673         my $ns_prefix = shift;
674         my $unique = shift;
675
676         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
677
678         my $string = "";
679
680         # grab the set of matching nodes
681         my @nodes = $xml->findnodes( $xpath );
682         for my $value (@nodes) {
683
684                 # grab all children of the node
685                 my @children = $value->childNodes();
686                 for my $child (@children) {
687
688                         # add the childs content to the growing buffer
689                         my $content = quotemeta($child->textContent);
690                         next if ($unique && $string =~ /$content/);  # uniquify the values
691                         $string .= $child->textContent . " ";
692                 }
693                 if( ! @children ) {
694                         $string .= $value->textContent . " ";
695                 }
696         }
697         return NFD($string);
698 }
699
700 sub class_index_string_xml {
701         my $self = shift;
702         my $client = shift;
703         my $xml = shift;
704         my @classes = @_;
705
706         OpenILS::Application::Ingest->post_init();
707         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
708
709         my %transform_cache;
710         
711         for my $class (@classes) {
712                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
713                 for my $type ( keys %{ $xpathset->{$class} } ) {
714
715                         my $def = $xpathset->{$class}->{$type};
716                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
717
718                         my $document = $xml;
719
720                         if ($sf->{xslt}) {
721                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
722                                 $transform_cache{$def->{format}} = $document;
723                         }
724
725                         my $value =  xpath_to_string(
726                                         $document->documentElement      => $def->{xpath},
727                                         $sf->{ns}                       => $def->{format},
728                                         1
729                         );
730
731                         next unless $value;
732
733                         $value = NFD($value);
734                         $value =~ s/\pM+//sgo;
735                         $value =~ s/\pC+//sgo;
736                         $value =~ s/\W+$//sgo;
737
738                         $value =~ s/\b\.+\b//sgo;
739                         $value = lc($value);
740
741                         my $fm = $class_constructor->new;
742                         $fm->value( $value );
743                         $fm->field( $xpathset->{$class}->{$type}->{id} );
744                         $client->respond($fm);
745                 }
746         }
747         return undef;
748 }
749 __PACKAGE__->register_method(  
750         api_name        => "open-ils.ingest.field_entry.class.xml",
751         method          => "class_index_string_xml",
752         api_level       => 1,
753         argc            => 2,
754         stream          => 1,
755 );                      
756
757 sub class_index_string_record {
758         my $self = shift;
759         my $client = shift;
760         my $rec = shift;
761         my @classes = shift;
762
763         OpenILS::Application::Ingest->post_init();
764         my $r = OpenSRF::AppSession
765                         ->create('open-ils.cstore')
766                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
767                         ->gather(1);
768
769         return undef unless ($r and @$r);
770
771         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
772                 $fm->source($rec);
773                 $client->respond($fm);
774         }
775         return undef;
776 }
777 __PACKAGE__->register_method(  
778         api_name        => "open-ils.ingest.field_entry.class.record",
779         method          => "class_index_string_record",
780         api_level       => 1,
781         argc            => 2,
782         stream          => 1,
783 );                      
784
785 sub all_index_string_xml {
786         my $self = shift;
787         my $client = shift;
788         my $xml = shift;
789
790         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
791                 $client->respond($fm);
792         }
793         return undef;
794 }
795 __PACKAGE__->register_method(  
796         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
797         method          => "all_index_string_xml",
798         api_level       => 1,
799         argc            => 1,
800         stream          => 1,
801 );                      
802
803 sub all_index_string_record {
804         my $self = shift;
805         my $client = shift;
806         my $rec = shift;
807
808         OpenILS::Application::Ingest->post_init();
809         my $r = OpenSRF::AppSession
810                         ->create('open-ils.cstore')
811                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
812                         ->gather(1);
813
814         return undef unless ($r and @$r);
815
816         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
817                 $fm->source($rec);
818                 $client->respond($fm);
819         }
820         return undef;
821 }
822 __PACKAGE__->register_method(  
823         api_name        => "open-ils.ingest.extract.field_entry.all.record",
824         method          => "all_index_string_record",
825         api_level       => 1,
826         argc            => 1,
827         stream          => 1,
828 );                      
829
830 # --------------------------------------------------------------------------------
831 # Flat MARC
832
833 package OpenILS::Application::Ingest::FlatMARC;
834 use base qw/OpenILS::Application::Ingest/;
835 use Unicode::Normalize;
836
837
838 sub _marcxml_to_full_rows {
839
840         my $marcxml = shift;
841         my $xmltype = shift || 'metabib';
842
843         my $type = "Fieldmapper::${xmltype}::full_rec";
844
845         my @ns_list;
846         
847         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
848
849         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
850                 next unless $tagline;
851
852                 my $ns = $type->new;
853
854                 $ns->tag( 'LDR' );
855                 my $val = $tagline->textContent;
856                 $val = NFD($val);
857                 $val =~ s/\pM+//sgo;
858                 $val =~ s/\pC+//sgo;
859                 $val =~ s/\W+$//sgo;
860                 $ns->value( $val );
861
862                 push @ns_list, $ns;
863         }
864
865         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
866                 next unless $tagline;
867
868                 my $ns = $type->new;
869
870                 $ns->tag( $tagline->getAttribute( "tag" ) );
871                 my $val = $tagline->textContent;
872                 $val = NFD($val);
873                 $val =~ s/\pM+//sgo;
874                 $val =~ s/\pC+//sgo;
875                 $val =~ s/\W+$//sgo;
876                 $ns->value( $val );
877
878                 push @ns_list, $ns;
879         }
880
881         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
882                 next unless $tagline;
883
884                 my $tag = $tagline->getAttribute( "tag" );
885                 my $ind1 = $tagline->getAttribute( "ind1" );
886                 my $ind2 = $tagline->getAttribute( "ind2" );
887
888                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
889                         next unless $data;
890
891                         my $ns = $type->new;
892
893                         $ns->tag( $tag );
894                         $ns->ind1( $ind1 );
895                         $ns->ind2( $ind2 );
896                         $ns->subfield( $data->getAttribute( "code" ) );
897                         my $val = $data->textContent;
898                         $val = NFD($val);
899                         $val =~ s/\pM+//sgo;
900                         $val =~ s/\pC+//sgo;
901                         $val =~ s/\W+$//sgo;
902                         $ns->value( lc($val) );
903
904                         push @ns_list, $ns;
905                 }
906         }
907
908         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
909         return @ns_list;
910 }
911
912 sub flat_marc_xml {
913         my $self = shift;
914         my $client = shift;
915         my $xml = shift;
916
917         $log->debug("processing [$xml]");
918
919         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
920
921         my $type = 'metabib';
922         $type = 'authority' if ($self->api_name =~ /authority/o);
923
924         OpenILS::Application::Ingest->post_init();
925
926         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
927         return undef;
928 }
929 __PACKAGE__->register_method(  
930         api_name        => "open-ils.ingest.flat_marc.authority.xml",
931         method          => "flat_marc_xml",
932         api_level       => 1,
933         argc            => 1,
934         stream          => 1,
935 );                      
936 __PACKAGE__->register_method(  
937         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
938         method          => "flat_marc_xml",
939         api_level       => 1,
940         argc            => 1,
941         stream          => 1,
942 );                      
943
944 sub flat_marc_record {
945         my $self = shift;
946         my $client = shift;
947         my $rec = shift;
948
949         my $type = 'biblio';
950         $type = 'authority' if ($self->api_name =~ /authority/o);
951
952         OpenILS::Application::Ingest->post_init();
953         my $r = OpenSRF::AppSession
954                         ->create('open-ils.cstore')
955                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
956                         ->gather(1);
957
958
959         return undef unless ($r and $r->marc);
960
961         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
962         for my $row (@rows) {
963                 $client->respond($row);
964                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
965         }
966         return undef;
967 }
968 __PACKAGE__->register_method(  
969         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
970         method          => "flat_marc_record",
971         api_level       => 1,
972         argc            => 1,
973         stream          => 1,
974 );                      
975 __PACKAGE__->register_method(  
976         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
977         method          => "flat_marc_record",
978         api_level       => 1,
979         argc            => 1,
980         stream          => 1,
981 );                      
982
983 # --------------------------------------------------------------------------------
984 # Fingerprinting
985
986 package OpenILS::Application::Ingest::Biblio::Fingerprint;
987 use base qw/OpenILS::Application::Ingest/;
988 use Unicode::Normalize;
989 use OpenSRF::EX qw/:try/;
990
991 sub biblio_fingerprint_record {
992         my $self = shift;
993         my $client = shift;
994         my $rec = shift;
995
996         OpenILS::Application::Ingest->post_init();
997
998         my $r = OpenSRF::AppSession
999                         ->create('open-ils.cstore')
1000                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1001                         ->gather(1);
1002
1003         return undef unless ($r and $r->marc);
1004
1005         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1006         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1007         $fp->{quality} = int($fp->{quality});
1008         return $fp;
1009 }
1010 __PACKAGE__->register_method(  
1011         api_name        => "open-ils.ingest.fingerprint.record",
1012         method          => "biblio_fingerprint_record",
1013         api_level       => 1,
1014         argc            => 1,
1015 );                      
1016
1017 our $fp_script;
1018 sub biblio_fingerprint {
1019         my $self = shift;
1020         my $client = shift;
1021         my $xml = OpenILS::Application::Ingest::entityize(shift);
1022
1023         $log->internal("Got MARC [$xml]");
1024
1025         if(!$fp_script) {
1026                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1027                 my $conf = OpenSRF::Utils::SettingsClient->new;
1028
1029                 my $libs        = $conf->config_value(@pfx, 'script_path');
1030                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1031                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1032
1033                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1034                 
1035                 $fp_script = new OpenILS::Utils::ScriptRunner
1036                         ( file          => $script_file,
1037                           paths         => $script_libs,
1038                           reset_count   => 100 );
1039         }
1040
1041         $fp_script->insert('environment' => {marc => $xml} => 1);
1042
1043         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1044         $log->debug("Script for biblio fingerprinting completed successfully...");
1045
1046         return $res;
1047 }
1048 __PACKAGE__->register_method(  
1049         api_name        => "open-ils.ingest.fingerprint.xml",
1050         method          => "biblio_fingerprint",
1051         api_level       => 1,
1052         argc            => 1,
1053 );                      
1054
1055 our $rd_script;
1056 sub biblio_descriptor {
1057         my $self = shift;
1058         my $client = shift;
1059         my $xml = OpenILS::Application::Ingest::entityize(shift);
1060
1061         $log->internal("Got MARC [$xml]");
1062
1063         if(!$rd_script) {
1064                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1065                 my $conf = OpenSRF::Utils::SettingsClient->new;
1066
1067                 my $libs        = $conf->config_value(@pfx, 'script_path');
1068                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1069                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1070
1071                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1072                 
1073                 $rd_script = new OpenILS::Utils::ScriptRunner
1074                         ( file          => $script_file,
1075                           paths         => $script_libs,
1076                           reset_count   => 100 );
1077         }
1078
1079         $log->debug("Setting up environment for descriptor extraction script...");
1080         $rd_script->insert('environment.marc' => $xml => 1);
1081         $log->debug("Environment building complete...");
1082
1083         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1084         $log->debug("Script for biblio descriptor extraction completed successfully");
1085
1086         return $res;
1087 }
1088 __PACKAGE__->register_method(  
1089         api_name        => "open-ils.ingest.descriptor.xml",
1090         method          => "biblio_descriptor",
1091         api_level       => 1,
1092         argc            => 1,
1093 );                      
1094
1095
1096 1;
1097