]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
disconnect to free up the cstore backend
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
11
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
15
16 use OpenILS::Utils::Fieldmapper;
17
18 use XML::LibXML;
19 use XML::LibXSLT;
20 use Time::HiRes qw(time);
21
22 our %supported_formats = (
23         mods32  => {ns => 'http://www.loc.gov/mods/v3'},
24         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
25         mods    => {ns => 'http://www.loc.gov/mods/'},
26         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
27         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
28         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
29         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
30         atom    => {ns => 'http://www.w3.org/2005/Atom'},
31         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
32         rss092  => {ns => ''},
33         rss093  => {ns => ''},
34         rss094  => {ns => ''},
35         rss10   => {ns => 'http://purl.org/rss/1.0/'},
36         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
37         rss2    => {ns => ''},
38 );
39
40
41 my $log = 'OpenSRF::Utils::Logger';
42
43 my  $parser = XML::LibXML->new();
44 my  $xslt = XML::LibXSLT->new();
45
46 my  $mods_sheet;
47 my  $mads_sheet;
48 my  $xpathset = {};
49 sub initialize {}
50 sub child_init {}
51
52 sub post_init {
53
54         unless (keys %$xpathset) {
55                 $log->debug("Running post_init", DEBUG);
56
57                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
58
59                 unless ($supported_formats{mods}{xslt}) {
60                         $log->debug("Loading MODS XSLT", DEBUG);
61                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
62                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63                 }
64
65                 unless ($supported_formats{mods3}{xslt}) {
66                         $log->debug("Loading MODS v3 XSLT", DEBUG);
67                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
68                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
69                 }
70
71                 unless ($supported_formats{mods32}{xslt}) {
72                         $log->debug("Loading MODS v32 XSLT", DEBUG);
73                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
74                         $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
75                 }
76
77                 my $req = OpenSRF::AppSession
78                                 ->create('open-ils.cstore')
79                                 
80                                 # XXX testing new metabib field use for faceting
81                                 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
82                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
83
84                                 ->gather(1);
85
86                 if (ref $req and @$req) {
87                         for my $f (@$req) {
88                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
89                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
90                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
91                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
92                         }
93                 }
94         }
95 }
96
97 sub entityize {
98         my $stuff = shift;
99         my $form = shift;
100
101         if ($form eq 'D') {
102                 $stuff = NFD($stuff);
103         } else {
104                 $stuff = NFC($stuff);
105         }
106
107         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
108         return $stuff;
109 }
110
111 # --------------------------------------------------------------------------------
112 # Biblio ingest
113
114 package OpenILS::Application::Ingest::Biblio;
115 use base qw/OpenILS::Application::Ingest/;
116 use Unicode::Normalize;
117
118 sub rw_biblio_ingest_single_object {
119         my $self = shift;
120         my $client = shift;
121         my $bib = shift;
122
123         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
124         return undef unless ($blob);
125
126         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
127         $bib->quality( $blob->{fingerprint}->{quality} );
128
129         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
130
131         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
132
133         # update full_rec stuff ...
134         my $tmp = $cstore->request(
135                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
136                 { record => $bib->id }
137         )->gather(1);
138
139         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
140         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
141
142         # update rec_descriptor stuff ...
143         $tmp = $cstore->request(
144                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
145                 { record => $bib->id }
146         )->gather(1);
147
148         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
149         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
150
151         # deal with classed fields...
152         for my $class ( qw/title author subject keyword series/ ) {
153                 $tmp = $cstore->request(
154                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
155                         { source => $bib->id }
156                 )->gather(1);
157
158                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
159         }
160         for my $obj ( @{ $blob->{field_entries} } ) {
161                 my $class = $obj->class_name;
162                 $class =~ s/^Fieldmapper:://o;
163                 $class =~ s/::/./go;
164                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
165         }
166
167         # update MR map ...
168
169         $tmp = $cstore->request(
170                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
171                 { source => $bib->id }
172         )->gather(1);
173
174         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
175
176         # get the old MRs
177         my $old_mrs = $cstore->request(
178                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
179         )->gather(1) if (@$tmp);
180
181         $old_mrs = [] if (!ref($old_mrs));
182
183         my $mr;
184         for my $m (@$old_mrs) {
185                 if ($m->fingerprint eq $bib->fingerprint) {
186                         $mr = $m;
187                 } else {
188                         my $others = $cstore->request(
189                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
190                         )->gather(1);
191
192                         if (!@$others) {
193                                 $cstore->request(
194                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
195                                 )->gather(1);
196                         }
197
198                         $m->isdeleted(1);
199                 }
200         }
201
202         my $holds;
203         if (!$mr) {
204                 # Get the matchin MR, if any.
205                 $mr = $cstore->request(
206                         'open-ils.cstore.direct.metabib.metarecord.search',
207                         { fingerprint => $bib->fingerprint }
208                 )->gather(1);
209
210                 $holds = $cstore->request(
211                         'open-ils.cstore.direct.action.hold_request.search.atomic',
212                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
213                 )->gather(1) if (@$old_mrs);
214
215                 if ($mr) {
216                         for my $h (@$holds) {
217                                 $h->target($mr);
218                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
219                                 $h->ischanged(1);
220                         }
221                 }
222         }
223
224         if (!$mr) {
225                 $mr = new Fieldmapper::metabib::metarecord;
226                 $mr->fingerprint( $bib->fingerprint );
227                 $mr->master_record( $bib->id );
228                 $mr->id(
229                         $cstore->request(
230                                 "open-ils.cstore.direct.metabib.metarecord.create",
231                                 $mr => { quiet => 'true' }
232                         )->gather(1)
233                 );
234
235                 for my $h (grep { !$_->ischanged } @$holds) {
236                         $h->target($mr);
237                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
238                 }
239         } else {
240                 my $mrm = $cstore->request(
241                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
242                         { metarecord => $mr->id }
243                 )->gather(1);
244
245                 if (@$mrm) {
246                         my $best = $cstore->request(
247                                 "open-ils.cstore.direct.biblio.record_entry.search",
248                                 { id => [ map { $_->source } @$mrm ] },
249                                 { 'select'      => { bre => [ qw/id quality/ ] },
250                                 order_by        => { bre => "quality desc" },
251                                 limit           => 1,
252                                 }
253                         )->gather(1);
254
255                         if ($best->quality > $bib->quality) {
256                                 $mr->master_record($best->id);
257                         } else {
258                                 $mr->master_record($bib->id);
259                         }
260                 } else {
261                         $mr->master_record($bib->id);
262                 }
263
264                 $mr->clear_mods;
265
266                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
267         }
268
269         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
270         $mrm->source($bib->id);
271         $mrm->metarecord($mr->id);
272
273         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
274         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
275
276         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
277     $cstore->disconnect;
278
279         return $bib->id;
280 }
281 __PACKAGE__->register_method(  
282         api_name        => "open-ils.ingest.full.biblio.object",
283         method          => "rw_biblio_ingest_single_object",
284         api_level       => 1,
285         argc            => 1,
286 );                      
287
288 sub rw_biblio_ingest_single_record {
289         my $self = shift;
290         my $client = shift;
291         my $rec = shift;
292
293         OpenILS::Application::Ingest->post_init();
294         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
295         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
296
297         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
298
299         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
300         $cstore->disconnect;
301
302         return undef unless ($r and @$r);
303
304         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
305 }
306 __PACKAGE__->register_method(  
307         api_name        => "open-ils.ingest.full.biblio.record",
308         method          => "rw_biblio_ingest_single_record",
309         api_level       => 1,
310         argc            => 1,
311 );                      
312
313 sub rw_biblio_ingest_record_list {
314         my $self = shift;
315         my $client = shift;
316         my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
317
318         OpenILS::Application::Ingest->post_init();
319         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
320         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
321
322         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
323
324         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
325         $cstore->disconnect;
326
327         return undef unless ($r and @$r);
328
329         my $count = 0;
330         $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
331
332         return $count;
333 }
334 __PACKAGE__->register_method(  
335         api_name        => "open-ils.ingest.full.biblio.record_list",
336         method          => "rw_biblio_ingest_record_list",
337         api_level       => 1,
338         argc            => 1,
339 );                      
340
341 sub ro_biblio_ingest_single_object {
342         my $self = shift;
343         my $client = shift;
344         my $bib = shift;
345         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
346
347         my $document = $parser->parse_string($xml);
348
349         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
350         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
351         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
352         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
353
354         $_->source($bib->id) for (@mXfe);
355         $_->record($bib->id) for (@mfr);
356         $rd->record($bib->id) if ($rd);
357
358         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
359 }
360 __PACKAGE__->register_method(  
361         api_name        => "open-ils.ingest.full.biblio.object.readonly",
362         method          => "ro_biblio_ingest_single_object",
363         api_level       => 1,
364         argc            => 1,
365 );                      
366
367 sub ro_biblio_ingest_single_xml {
368         my $self = shift;
369         my $client = shift;
370         my $xml = OpenILS::Application::Ingest::entityize(shift);
371
372         my $document = $parser->parse_string($xml);
373
374         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
375         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
376         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
377         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
378
379         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
380 }
381 __PACKAGE__->register_method(  
382         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
383         method          => "ro_biblio_ingest_single_xml",
384         api_level       => 1,
385         argc            => 1,
386 );                      
387
388 sub ro_biblio_ingest_single_record {
389         my $self = shift;
390         my $client = shift;
391         my $rec = shift;
392
393         OpenILS::Application::Ingest->post_init();
394         my $r = OpenSRF::AppSession
395                         ->create('open-ils.cstore')
396                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
397                         ->gather(1);
398
399         return undef unless ($r and @$r);
400
401         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
402
403         $_->source($rec) for (@{$res->{field_entries}});
404         $_->record($rec) for (@{$res->{full_rec}});
405         $res->{descriptor}->record($rec);
406
407         return $res;
408 }
409 __PACKAGE__->register_method(  
410         api_name        => "open-ils.ingest.full.biblio.record.readonly",
411         method          => "ro_biblio_ingest_single_record",
412         api_level       => 1,
413         argc            => 1,
414 );                      
415
416 sub ro_biblio_ingest_stream_record {
417         my $self = shift;
418         my $client = shift;
419
420         OpenILS::Application::Ingest->post_init();
421
422         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
423
424         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
425         
426                 my $rec = $resp->content;
427                 last unless (defined $rec);
428
429                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
430                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
431
432                 $_->source($rec) for (@{$res->{field_entries}});
433                 $_->record($rec) for (@{$res->{full_rec}});
434
435                 $client->respond( $res );
436         }
437
438         return undef;
439 }
440 __PACKAGE__->register_method(  
441         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
442         method          => "ro_biblio_ingest_stream_record",
443         api_level       => 1,
444         stream          => 1,
445 );                      
446
447 sub ro_biblio_ingest_stream_xml {
448         my $self = shift;
449         my $client = shift;
450
451         OpenILS::Application::Ingest->post_init();
452
453         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
454
455         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
456         
457                 my $xml = $resp->content;
458                 last unless (defined $xml);
459
460                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
461                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
462
463                 $client->respond( $res );
464         }
465
466         return undef;
467 }
468 __PACKAGE__->register_method(  
469         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
470         method          => "ro_biblio_ingest_stream_xml",
471         api_level       => 1,
472         stream          => 1,
473 );                      
474
475 sub rw_biblio_ingest_stream_import {
476         my $self = shift;
477         my $client = shift;
478
479         OpenILS::Application::Ingest->post_init();
480
481         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
482
483         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
484         
485                 my $bib = $resp->content;
486                 last unless (defined $bib);
487
488                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
489                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
490
491                 $_->source($bib->id) for (@{$res->{field_entries}});
492                 $_->record($bib->id) for (@{$res->{full_rec}});
493
494                 $client->respond( $res );
495         }
496
497         return undef;
498 }
499 __PACKAGE__->register_method(  
500         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
501         method          => "rw_biblio_ingest_stream_import",
502         api_level       => 1,
503         stream          => 1,
504 );                      
505
506
507 # --------------------------------------------------------------------------------
508 # Authority ingest
509
510 package OpenILS::Application::Ingest::Authority;
511 use base qw/OpenILS::Application::Ingest/;
512 use Unicode::Normalize;
513
514 sub ro_authority_ingest_single_object {
515         my $self = shift;
516         my $client = shift;
517         my $bib = shift;
518         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
519
520         my $document = $parser->parse_string($xml);
521
522         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
523
524         $_->record($bib->id) for (@mfr);
525
526         return { full_rec => \@mfr };
527 }
528 __PACKAGE__->register_method(  
529         api_name        => "open-ils.ingest.full.authority.object.readonly",
530         method          => "ro_authority_ingest_single_object",
531         api_level       => 1,
532         argc            => 1,
533 );                      
534
535 sub ro_authority_ingest_single_xml {
536         my $self = shift;
537         my $client = shift;
538         my $xml = OpenILS::Application::Ingest::entityize(shift);
539
540         my $document = $parser->parse_string($xml);
541
542         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
543
544         return { full_rec => \@mfr };
545 }
546 __PACKAGE__->register_method(  
547         api_name        => "open-ils.ingest.full.authority.xml.readonly",
548         method          => "ro_authority_ingest_single_xml",
549         api_level       => 1,
550         argc            => 1,
551 );                      
552
553 sub ro_authority_ingest_single_record {
554         my $self = shift;
555         my $client = shift;
556         my $rec = shift;
557
558         OpenILS::Application::Ingest->post_init();
559         my $r = OpenSRF::AppSession
560                         ->create('open-ils.cstore')
561                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
562                         ->gather(1);
563
564         return undef unless ($r and @$r);
565
566         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
567
568         $_->record($rec) for (@{$res->{full_rec}});
569         $res->{descriptor}->record($rec);
570
571         return $res;
572 }
573 __PACKAGE__->register_method(  
574         api_name        => "open-ils.ingest.full.authority.record.readonly",
575         method          => "ro_authority_ingest_single_record",
576         api_level       => 1,
577         argc            => 1,
578 );                      
579
580 sub ro_authority_ingest_stream_record {
581         my $self = shift;
582         my $client = shift;
583
584         OpenILS::Application::Ingest->post_init();
585
586         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
587
588         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
589         
590                 my $rec = $resp->content;
591                 last unless (defined $rec);
592
593                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
594                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
595
596                 $_->record($rec) for (@{$res->{full_rec}});
597
598                 $client->respond( $res );
599         }
600
601         return undef;
602 }
603 __PACKAGE__->register_method(  
604         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
605         method          => "ro_authority_ingest_stream_record",
606         api_level       => 1,
607         stream          => 1,
608 );                      
609
610 sub ro_authority_ingest_stream_xml {
611         my $self = shift;
612         my $client = shift;
613
614         OpenILS::Application::Ingest->post_init();
615
616         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
617
618         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
619         
620                 my $xml = $resp->content;
621                 last unless (defined $xml);
622
623                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
624                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
625
626                 $client->respond( $res );
627         }
628
629         return undef;
630 }
631 __PACKAGE__->register_method(  
632         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
633         method          => "ro_authority_ingest_stream_xml",
634         api_level       => 1,
635         stream          => 1,
636 );                      
637
638 sub rw_authority_ingest_stream_import {
639         my $self = shift;
640         my $client = shift;
641
642         OpenILS::Application::Ingest->post_init();
643
644         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
645
646         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
647         
648                 my $bib = $resp->content;
649                 last unless (defined $bib);
650
651                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
652                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
653
654                 $_->record($bib->id) for (@{$res->{full_rec}});
655
656                 $client->respond( $res );
657         }
658
659         return undef;
660 }
661 __PACKAGE__->register_method(  
662         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
663         method          => "rw_authority_ingest_stream_import",
664         api_level       => 1,
665         stream          => 1,
666 );                      
667
668
669 # --------------------------------------------------------------------------------
670 # MARC index extraction
671
672 package OpenILS::Application::Ingest::XPATH;
673 use base qw/OpenILS::Application::Ingest/;
674 use Unicode::Normalize;
675
676 # give this an XML documentElement and an XPATH expression
677 sub xpath_to_string {
678         my $xml = shift;
679         my $xpath = shift;
680         my $ns_uri = shift;
681         my $ns_prefix = shift;
682         my $unique = shift;
683
684         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
685
686         my $string = "";
687
688         # grab the set of matching nodes
689         my @nodes = $xml->findnodes( $xpath );
690         for my $value (@nodes) {
691
692                 # grab all children of the node
693                 my @children = $value->childNodes();
694                 for my $child (@children) {
695
696                         # add the childs content to the growing buffer
697                         my $content = quotemeta($child->textContent);
698                         next if ($unique && $string =~ /$content/);  # uniquify the values
699                         $string .= $child->textContent . " ";
700                 }
701                 if( ! @children ) {
702                         $string .= $value->textContent . " ";
703                 }
704         }
705
706     $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
707
708         return NFD($string);
709 }
710
711 sub class_index_string_xml {
712         my $self = shift;
713         my $client = shift;
714         my $xml = shift;
715         my @classes = @_;
716
717         OpenILS::Application::Ingest->post_init();
718         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
719
720         my %transform_cache;
721         
722         for my $class (@classes) {
723                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
724                 for my $type ( keys %{ $xpathset->{$class} } ) {
725
726                         my $def = $xpathset->{$class}->{$type};
727                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
728
729                         my $document = $xml;
730
731                         if ($sf->{xslt}) {
732                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
733                                 $transform_cache{$def->{format}} = $document;
734                         }
735
736                         my $value =  xpath_to_string(
737                                         $document->documentElement      => $def->{xpath},
738                                         $sf->{ns}                       => $def->{format},
739                                         1
740                         );
741
742                         next unless $value;
743
744                         $value = NFD($value);
745                         $value =~ s/\pM+//sgo;
746                         $value =~ s/\pC+//sgo;
747                         $value =~ s/\W+$//sgo;
748
749                         $value =~ s/\b\.+\b//sgo;
750                         $value = lc($value);
751
752                         my $fm = $class_constructor->new;
753                         $fm->value( $value );
754                         $fm->field( $xpathset->{$class}->{$type}->{id} );
755                         $client->respond($fm);
756                 }
757         }
758         return undef;
759 }
760 __PACKAGE__->register_method(  
761         api_name        => "open-ils.ingest.field_entry.class.xml",
762         method          => "class_index_string_xml",
763         api_level       => 1,
764         argc            => 2,
765         stream          => 1,
766 );                      
767
768 sub class_index_string_record {
769         my $self = shift;
770         my $client = shift;
771         my $rec = shift;
772         my @classes = shift;
773
774         OpenILS::Application::Ingest->post_init();
775         my $r = OpenSRF::AppSession
776                         ->create('open-ils.cstore')
777                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
778                         ->gather(1);
779
780         return undef unless ($r and @$r);
781
782         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
783                 $fm->source($rec);
784                 $client->respond($fm);
785         }
786         return undef;
787 }
788 __PACKAGE__->register_method(  
789         api_name        => "open-ils.ingest.field_entry.class.record",
790         method          => "class_index_string_record",
791         api_level       => 1,
792         argc            => 2,
793         stream          => 1,
794 );                      
795
796 sub all_index_string_xml {
797         my $self = shift;
798         my $client = shift;
799         my $xml = shift;
800
801         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
802                 $client->respond($fm);
803         }
804         return undef;
805 }
806 __PACKAGE__->register_method(  
807         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
808         method          => "all_index_string_xml",
809         api_level       => 1,
810         argc            => 1,
811         stream          => 1,
812 );                      
813
814 sub all_index_string_record {
815         my $self = shift;
816         my $client = shift;
817         my $rec = shift;
818
819         OpenILS::Application::Ingest->post_init();
820         my $r = OpenSRF::AppSession
821                         ->create('open-ils.cstore')
822                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
823                         ->gather(1);
824
825         return undef unless ($r and @$r);
826
827         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
828                 $fm->source($rec);
829                 $client->respond($fm);
830         }
831         return undef;
832 }
833 __PACKAGE__->register_method(  
834         api_name        => "open-ils.ingest.extract.field_entry.all.record",
835         method          => "all_index_string_record",
836         api_level       => 1,
837         argc            => 1,
838         stream          => 1,
839 );                      
840
841 # --------------------------------------------------------------------------------
842 # Flat MARC
843
844 package OpenILS::Application::Ingest::FlatMARC;
845 use base qw/OpenILS::Application::Ingest/;
846 use Unicode::Normalize;
847
848
849 sub _marcxml_to_full_rows {
850
851         my $marcxml = shift;
852         my $xmltype = shift || 'metabib';
853
854         my $type = "Fieldmapper::${xmltype}::full_rec";
855
856         my @ns_list;
857         
858         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
859
860         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
861                 next unless $tagline;
862
863                 my $ns = $type->new;
864
865                 $ns->tag( 'LDR' );
866                 my $val = $tagline->textContent;
867                 $val = NFD($val);
868                 $val =~ s/\pM+//sgo;
869                 $val =~ s/\pC+//sgo;
870                 $val =~ s/\W+$//sgo;
871                 $ns->value( $val );
872
873                 push @ns_list, $ns;
874         }
875
876         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
877                 next unless $tagline;
878
879                 my $ns = $type->new;
880
881                 $ns->tag( $tagline->getAttribute( "tag" ) );
882                 my $val = $tagline->textContent;
883                 $val = NFD($val);
884                 $val =~ s/\pM+//sgo;
885                 $val =~ s/\pC+//sgo;
886                 $val =~ s/\W+$//sgo;
887                 $ns->value( $val );
888
889                 push @ns_list, $ns;
890         }
891
892         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
893                 next unless $tagline;
894
895                 my $tag = $tagline->getAttribute( "tag" );
896                 my $ind1 = $tagline->getAttribute( "ind1" );
897                 my $ind2 = $tagline->getAttribute( "ind2" );
898
899                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
900                         next unless $data;
901
902                         my $ns = $type->new;
903
904                         $ns->tag( $tag );
905                         $ns->ind1( $ind1 );
906                         $ns->ind2( $ind2 );
907                         $ns->subfield( $data->getAttribute( "code" ) );
908                         my $val = $data->textContent;
909                         $val = NFD($val);
910                         $val =~ s/\pM+//sgo;
911                         $val =~ s/\pC+//sgo;
912                         $val =~ s/\W+$//sgo;
913             $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
914                         $ns->value( lc($val) );
915
916                         push @ns_list, $ns;
917                 }
918         }
919
920         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
921         return @ns_list;
922 }
923
924 sub flat_marc_xml {
925         my $self = shift;
926         my $client = shift;
927         my $xml = shift;
928
929         $log->debug("processing [$xml]");
930
931         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
932
933         my $type = 'metabib';
934         $type = 'authority' if ($self->api_name =~ /authority/o);
935
936         OpenILS::Application::Ingest->post_init();
937
938         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
939         return undef;
940 }
941 __PACKAGE__->register_method(  
942         api_name        => "open-ils.ingest.flat_marc.authority.xml",
943         method          => "flat_marc_xml",
944         api_level       => 1,
945         argc            => 1,
946         stream          => 1,
947 );                      
948 __PACKAGE__->register_method(  
949         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
950         method          => "flat_marc_xml",
951         api_level       => 1,
952         argc            => 1,
953         stream          => 1,
954 );                      
955
956 sub flat_marc_record {
957         my $self = shift;
958         my $client = shift;
959         my $rec = shift;
960
961         my $type = 'biblio';
962         $type = 'authority' if ($self->api_name =~ /authority/o);
963
964         OpenILS::Application::Ingest->post_init();
965         my $r = OpenSRF::AppSession
966                         ->create('open-ils.cstore')
967                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
968                         ->gather(1);
969
970
971         return undef unless ($r and $r->marc);
972
973         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
974         for my $row (@rows) {
975                 $client->respond($row);
976                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
977         }
978         return undef;
979 }
980 __PACKAGE__->register_method(  
981         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
982         method          => "flat_marc_record",
983         api_level       => 1,
984         argc            => 1,
985         stream          => 1,
986 );                      
987 __PACKAGE__->register_method(  
988         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
989         method          => "flat_marc_record",
990         api_level       => 1,
991         argc            => 1,
992         stream          => 1,
993 );                      
994
995 # --------------------------------------------------------------------------------
996 # Fingerprinting
997
998 package OpenILS::Application::Ingest::Biblio::Fingerprint;
999 use base qw/OpenILS::Application::Ingest/;
1000 use Unicode::Normalize;
1001 use OpenSRF::EX qw/:try/;
1002
1003 sub biblio_fingerprint_record {
1004         my $self = shift;
1005         my $client = shift;
1006         my $rec = shift;
1007
1008         OpenILS::Application::Ingest->post_init();
1009
1010         my $r = OpenSRF::AppSession
1011                         ->create('open-ils.cstore')
1012                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1013                         ->gather(1);
1014
1015         return undef unless ($r and $r->marc);
1016
1017         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1018         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1019         $fp->{quality} = int($fp->{quality});
1020         return $fp;
1021 }
1022 __PACKAGE__->register_method(  
1023         api_name        => "open-ils.ingest.fingerprint.record",
1024         method          => "biblio_fingerprint_record",
1025         api_level       => 1,
1026         argc            => 1,
1027 );                      
1028
1029 our $fp_script;
1030 sub biblio_fingerprint {
1031         my $self = shift;
1032         my $client = shift;
1033         my $xml = OpenILS::Application::Ingest::entityize(shift);
1034
1035         $log->internal("Got MARC [$xml]");
1036
1037         if(!$fp_script) {
1038                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1039                 my $conf = OpenSRF::Utils::SettingsClient->new;
1040
1041                 my $libs        = $conf->config_value(@pfx, 'script_path');
1042                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1043                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1044
1045                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1046                 
1047                 $fp_script = new OpenILS::Utils::ScriptRunner
1048                         ( file          => $script_file,
1049                           paths         => $script_libs,
1050                           reset_count   => 100 );
1051         }
1052
1053         $fp_script->insert('environment' => {marc => $xml} => 1);
1054
1055         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1056         $log->debug("Script for biblio fingerprinting completed successfully...");
1057
1058         return $res;
1059 }
1060 __PACKAGE__->register_method(  
1061         api_name        => "open-ils.ingest.fingerprint.xml",
1062         method          => "biblio_fingerprint",
1063         api_level       => 1,
1064         argc            => 1,
1065 );                      
1066
1067 our $rd_script;
1068 sub biblio_descriptor {
1069         my $self = shift;
1070         my $client = shift;
1071         my $xml = OpenILS::Application::Ingest::entityize(shift);
1072
1073         $log->internal("Got MARC [$xml]");
1074
1075         if(!$rd_script) {
1076                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1077                 my $conf = OpenSRF::Utils::SettingsClient->new;
1078
1079                 my $libs        = $conf->config_value(@pfx, 'script_path');
1080                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1081                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1082
1083                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1084                 
1085                 $rd_script = new OpenILS::Utils::ScriptRunner
1086                         ( file          => $script_file,
1087                           paths         => $script_libs,
1088                           reset_count   => 100 );
1089         }
1090
1091         $log->debug("Setting up environment for descriptor extraction script...");
1092         $rd_script->insert('environment.marc' => $xml => 1);
1093         $log->debug("Environment building complete...");
1094
1095         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1096         $log->debug("Script for biblio descriptor extraction completed successfully");
1097
1098     my $d1 = $res->date1;
1099     if ($d1 && $d1 ne '    ') {
1100         $d1 =~ tr/ux/00/;
1101         $res->date1( $d1 );
1102     }
1103
1104     my $d2 = $res->date2;
1105     if ($d2 && $d2 ne '    ') {
1106         $d2 =~ tr/ux/99/;
1107         $res->date2( $d2 );
1108     }
1109
1110         return $res;
1111 }
1112 __PACKAGE__->register_method(  
1113         api_name        => "open-ils.ingest.descriptor.xml",
1114         method          => "biblio_descriptor",
1115         api_level       => 1,
1116         argc            => 1,
1117 );                      
1118
1119
1120 1;
1121