]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
changed return object key names for consistency
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
11
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
15
16 use OpenILS::Utils::Fieldmapper;
17
18 use XML::LibXML;
19 use XML::LibXSLT;
20 use Time::HiRes qw(time);
21
22 our %supported_formats = (
23         mods32  => {ns => 'http://www.loc.gov/mods/v3'},
24         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
25         mods    => {ns => 'http://www.loc.gov/mods/'},
26         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
27         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
28         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
29         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
30         atom    => {ns => 'http://www.w3.org/2005/Atom'},
31         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
32         rss092  => {ns => ''},
33         rss093  => {ns => ''},
34         rss094  => {ns => ''},
35         rss10   => {ns => 'http://purl.org/rss/1.0/'},
36         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
37         rss2    => {ns => ''},
38 );
39
40
41 my $log = 'OpenSRF::Utils::Logger';
42
43 my  $parser = XML::LibXML->new();
44 my  $xslt = XML::LibXSLT->new();
45
46 my  $mods_sheet;
47 my  $mads_sheet;
48 my  $xpathset = {};
49 sub initialize {}
50 sub child_init {}
51
52 sub post_init {
53
54         unless (keys %$xpathset) {
55                 $log->debug("Running post_init", DEBUG);
56
57                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
58
59                 unless ($supported_formats{mods}{xslt}) {
60                         $log->debug("Loading MODS XSLT", DEBUG);
61                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
62                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63                 }
64
65                 unless ($supported_formats{mods3}{xslt}) {
66                         $log->debug("Loading MODS v3 XSLT", DEBUG);
67                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
68                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
69                 }
70
71                 unless ($supported_formats{mods32}{xslt}) {
72                         $log->debug("Loading MODS v32 XSLT", DEBUG);
73                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
74                         $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
75                 }
76
77                 my $req = OpenSRF::AppSession
78                                 ->create('open-ils.cstore')
79                                 
80                                 # XXX testing new metabib field use for faceting
81                                 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
82                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
83
84                                 ->gather(1);
85
86                 if (ref $req and @$req) {
87                         for my $f (@$req) {
88                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
89                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
90                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
91                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
92                         }
93                 }
94         }
95 }
96
97 sub entityize {
98         my $stuff = shift;
99         my $form = shift;
100
101         if ($form eq 'D') {
102                 $stuff = NFD($stuff);
103         } else {
104                 $stuff = NFC($stuff);
105         }
106
107         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
108         return $stuff;
109 }
110
111 # --------------------------------------------------------------------------------
112 # Biblio ingest
113
114 package OpenILS::Application::Ingest::Biblio;
115 use base qw/OpenILS::Application::Ingest/;
116 use Unicode::Normalize;
117
118 sub rw_biblio_ingest_single_object {
119         my $self = shift;
120         my $client = shift;
121         my $bib = shift;
122
123         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
124         return undef unless ($blob);
125
126         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
127         $bib->quality( $blob->{fingerprint}->{quality} );
128
129         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
130
131         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
132
133         # update full_rec stuff ...
134         my $tmp = $cstore->request(
135                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
136                 { record => $bib->id }
137         )->gather(1);
138
139         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
140         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
141
142         # update rec_descriptor stuff ...
143         $tmp = $cstore->request(
144                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
145                 { record => $bib->id }
146         )->gather(1);
147
148         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
149         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
150
151         # deal with classed fields...
152         for my $class ( qw/title author subject keyword series/ ) {
153                 $tmp = $cstore->request(
154                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
155                         { source => $bib->id }
156                 )->gather(1);
157
158                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
159         }
160         for my $obj ( @{ $blob->{field_entries} } ) {
161                 my $class = $obj->class_name;
162                 $class =~ s/^Fieldmapper:://o;
163                 $class =~ s/::/./go;
164                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
165         }
166
167         # update MR map ...
168
169         $tmp = $cstore->request(
170                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
171                 { source => $bib->id }
172         )->gather(1);
173
174         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
175
176         # get the old MRs
177         my $old_mrs = $cstore->request(
178                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
179         )->gather(1) if (@$tmp);
180
181         $old_mrs = [] if (!ref($old_mrs));
182
183         my $mr;
184         for my $m (@$old_mrs) {
185                 if ($m->fingerprint eq $bib->fingerprint) {
186                         $mr = $m;
187                 } else {
188                         my $others = $cstore->request(
189                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
190                         )->gather(1);
191
192                         if (!@$others) {
193                                 $cstore->request(
194                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
195                                 )->gather(1);
196                         }
197
198                         $m->isdeleted(1);
199                 }
200         }
201
202         my $holds;
203         if (!$mr) {
204                 # Get the matchin MR, if any.
205                 $mr = $cstore->request(
206                         'open-ils.cstore.direct.metabib.metarecord.search',
207                         { fingerprint => $bib->fingerprint }
208                 )->gather(1);
209
210                 $holds = $cstore->request(
211                         'open-ils.cstore.direct.action.hold_request.search.atomic',
212                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
213                 )->gather(1) if (@$old_mrs);
214
215                 if ($mr) {
216                         for my $h (@$holds) {
217                                 $h->target($mr);
218                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
219                                 $h->ischanged(1);
220                         }
221                 }
222         }
223
224         if (!$mr) {
225                 $mr = new Fieldmapper::metabib::metarecord;
226                 $mr->fingerprint( $bib->fingerprint );
227                 $mr->master_record( $bib->id );
228                 $mr->id(
229                         $cstore->request(
230                                 "open-ils.cstore.direct.metabib.metarecord.create",
231                                 $mr => { quiet => 'true' }
232                         )->gather(1)
233                 );
234
235                 for my $h (grep { !$_->ischanged } @$holds) {
236                         $h->target($mr);
237                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
238                 }
239         } else {
240                 my $mrm = $cstore->request(
241                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
242                         { metarecord => $mr->id }
243                 )->gather(1);
244
245                 if (@$mrm) {
246                         my $best = $cstore->request(
247                                 "open-ils.cstore.direct.biblio.record_entry.search",
248                                 { id => [ map { $_->source } @$mrm ] },
249                                 { 'select'      => { bre => [ qw/id quality/ ] },
250                                 order_by        => { bre => "quality desc" },
251                                 limit           => 1,
252                                 }
253                         )->gather(1);
254
255                         if ($best->quality > $bib->quality) {
256                                 $mr->master_record($best->id);
257                         } else {
258                                 $mr->master_record($bib->id);
259                         }
260                 } else {
261                         $mr->master_record($bib->id);
262                 }
263
264                 $mr->clear_mods;
265
266                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
267         }
268
269         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
270         $mrm->source($bib->id);
271         $mrm->metarecord($mr->id);
272
273         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
274         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
275
276         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
277
278         return $bib->id;
279 }
280 __PACKAGE__->register_method(  
281         api_name        => "open-ils.ingest.full.biblio.object",
282         method          => "rw_biblio_ingest_single_object",
283         api_level       => 1,
284         argc            => 1,
285 );                      
286
287 sub rw_biblio_ingest_single_record {
288         my $self = shift;
289         my $client = shift;
290         my $rec = shift;
291
292         OpenILS::Application::Ingest->post_init();
293         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
294         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
295
296         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
297
298         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
299         $cstore->disconnect;
300
301         return undef unless ($r and @$r);
302
303         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
304 }
305 __PACKAGE__->register_method(  
306         api_name        => "open-ils.ingest.full.biblio.record",
307         method          => "rw_biblio_ingest_single_record",
308         api_level       => 1,
309         argc            => 1,
310 );                      
311
312 sub rw_biblio_ingest_record_list {
313         my $self = shift;
314         my $client = shift;
315         my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
316
317         OpenILS::Application::Ingest->post_init();
318         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
319         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
320
321         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
322
323         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
324         $cstore->disconnect;
325
326         return undef unless ($r and @$r);
327
328         my $count = 0;
329         $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
330
331         return $count;
332 }
333 __PACKAGE__->register_method(  
334         api_name        => "open-ils.ingest.full.biblio.record_list",
335         method          => "rw_biblio_ingest_record_list",
336         api_level       => 1,
337         argc            => 1,
338 );                      
339
340 sub ro_biblio_ingest_single_object {
341         my $self = shift;
342         my $client = shift;
343         my $bib = shift;
344         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
345
346         my $document = $parser->parse_string($xml);
347
348         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
349         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
350         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
351         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
352
353         $_->source($bib->id) for (@mXfe);
354         $_->record($bib->id) for (@mfr);
355         $rd->record($bib->id) if ($rd);
356
357         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
358 }
359 __PACKAGE__->register_method(  
360         api_name        => "open-ils.ingest.full.biblio.object.readonly",
361         method          => "ro_biblio_ingest_single_object",
362         api_level       => 1,
363         argc            => 1,
364 );                      
365
366 sub ro_biblio_ingest_single_xml {
367         my $self = shift;
368         my $client = shift;
369         my $xml = OpenILS::Application::Ingest::entityize(shift);
370
371         my $document = $parser->parse_string($xml);
372
373         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
374         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
375         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
376         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
377
378         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
379 }
380 __PACKAGE__->register_method(  
381         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
382         method          => "ro_biblio_ingest_single_xml",
383         api_level       => 1,
384         argc            => 1,
385 );                      
386
387 sub ro_biblio_ingest_single_record {
388         my $self = shift;
389         my $client = shift;
390         my $rec = shift;
391
392         OpenILS::Application::Ingest->post_init();
393         my $r = OpenSRF::AppSession
394                         ->create('open-ils.cstore')
395                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
396                         ->gather(1);
397
398         return undef unless ($r and @$r);
399
400         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
401
402         $_->source($rec) for (@{$res->{field_entries}});
403         $_->record($rec) for (@{$res->{full_rec}});
404         $res->{descriptor}->record($rec);
405
406         return $res;
407 }
408 __PACKAGE__->register_method(  
409         api_name        => "open-ils.ingest.full.biblio.record.readonly",
410         method          => "ro_biblio_ingest_single_record",
411         api_level       => 1,
412         argc            => 1,
413 );                      
414
415 sub ro_biblio_ingest_stream_record {
416         my $self = shift;
417         my $client = shift;
418
419         OpenILS::Application::Ingest->post_init();
420
421         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
422
423         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
424         
425                 my $rec = $resp->content;
426                 last unless (defined $rec);
427
428                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
429                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
430
431                 $_->source($rec) for (@{$res->{field_entries}});
432                 $_->record($rec) for (@{$res->{full_rec}});
433
434                 $client->respond( $res );
435         }
436
437         return undef;
438 }
439 __PACKAGE__->register_method(  
440         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
441         method          => "ro_biblio_ingest_stream_record",
442         api_level       => 1,
443         stream          => 1,
444 );                      
445
446 sub ro_biblio_ingest_stream_xml {
447         my $self = shift;
448         my $client = shift;
449
450         OpenILS::Application::Ingest->post_init();
451
452         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
453
454         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
455         
456                 my $xml = $resp->content;
457                 last unless (defined $xml);
458
459                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
460                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
461
462                 $client->respond( $res );
463         }
464
465         return undef;
466 }
467 __PACKAGE__->register_method(  
468         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
469         method          => "ro_biblio_ingest_stream_xml",
470         api_level       => 1,
471         stream          => 1,
472 );                      
473
474 sub rw_biblio_ingest_stream_import {
475         my $self = shift;
476         my $client = shift;
477
478         OpenILS::Application::Ingest->post_init();
479
480         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
481
482         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
483         
484                 my $bib = $resp->content;
485                 last unless (defined $bib);
486
487                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
488                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
489
490                 $_->source($bib->id) for (@{$res->{field_entries}});
491                 $_->record($bib->id) for (@{$res->{full_rec}});
492
493                 $client->respond( $res );
494         }
495
496         return undef;
497 }
498 __PACKAGE__->register_method(  
499         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
500         method          => "rw_biblio_ingest_stream_import",
501         api_level       => 1,
502         stream          => 1,
503 );                      
504
505
506 # --------------------------------------------------------------------------------
507 # Authority ingest
508
509 package OpenILS::Application::Ingest::Authority;
510 use base qw/OpenILS::Application::Ingest/;
511 use Unicode::Normalize;
512
513 sub ro_authority_ingest_single_object {
514         my $self = shift;
515         my $client = shift;
516         my $bib = shift;
517         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
518
519         my $document = $parser->parse_string($xml);
520
521         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
522
523         $_->record($bib->id) for (@mfr);
524
525         return { full_rec => \@mfr };
526 }
527 __PACKAGE__->register_method(  
528         api_name        => "open-ils.ingest.full.authority.object.readonly",
529         method          => "ro_authority_ingest_single_object",
530         api_level       => 1,
531         argc            => 1,
532 );                      
533
534 sub ro_authority_ingest_single_xml {
535         my $self = shift;
536         my $client = shift;
537         my $xml = OpenILS::Application::Ingest::entityize(shift);
538
539         my $document = $parser->parse_string($xml);
540
541         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
542
543         return { full_rec => \@mfr };
544 }
545 __PACKAGE__->register_method(  
546         api_name        => "open-ils.ingest.full.authority.xml.readonly",
547         method          => "ro_authority_ingest_single_xml",
548         api_level       => 1,
549         argc            => 1,
550 );                      
551
552 sub ro_authority_ingest_single_record {
553         my $self = shift;
554         my $client = shift;
555         my $rec = shift;
556
557         OpenILS::Application::Ingest->post_init();
558         my $r = OpenSRF::AppSession
559                         ->create('open-ils.cstore')
560                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
561                         ->gather(1);
562
563         return undef unless ($r and @$r);
564
565         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
566
567         $_->record($rec) for (@{$res->{full_rec}});
568         $res->{descriptor}->record($rec);
569
570         return $res;
571 }
572 __PACKAGE__->register_method(  
573         api_name        => "open-ils.ingest.full.authority.record.readonly",
574         method          => "ro_authority_ingest_single_record",
575         api_level       => 1,
576         argc            => 1,
577 );                      
578
579 sub ro_authority_ingest_stream_record {
580         my $self = shift;
581         my $client = shift;
582
583         OpenILS::Application::Ingest->post_init();
584
585         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
586
587         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
588         
589                 my $rec = $resp->content;
590                 last unless (defined $rec);
591
592                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
593                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
594
595                 $_->record($rec) for (@{$res->{full_rec}});
596
597                 $client->respond( $res );
598         }
599
600         return undef;
601 }
602 __PACKAGE__->register_method(  
603         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
604         method          => "ro_authority_ingest_stream_record",
605         api_level       => 1,
606         stream          => 1,
607 );                      
608
609 sub ro_authority_ingest_stream_xml {
610         my $self = shift;
611         my $client = shift;
612
613         OpenILS::Application::Ingest->post_init();
614
615         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
616
617         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
618         
619                 my $xml = $resp->content;
620                 last unless (defined $xml);
621
622                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
623                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
624
625                 $client->respond( $res );
626         }
627
628         return undef;
629 }
630 __PACKAGE__->register_method(  
631         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
632         method          => "ro_authority_ingest_stream_xml",
633         api_level       => 1,
634         stream          => 1,
635 );                      
636
637 sub rw_authority_ingest_stream_import {
638         my $self = shift;
639         my $client = shift;
640
641         OpenILS::Application::Ingest->post_init();
642
643         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
644
645         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
646         
647                 my $bib = $resp->content;
648                 last unless (defined $bib);
649
650                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
651                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
652
653                 $_->record($bib->id) for (@{$res->{full_rec}});
654
655                 $client->respond( $res );
656         }
657
658         return undef;
659 }
660 __PACKAGE__->register_method(  
661         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
662         method          => "rw_authority_ingest_stream_import",
663         api_level       => 1,
664         stream          => 1,
665 );                      
666
667
668 # --------------------------------------------------------------------------------
669 # MARC index extraction
670
671 package OpenILS::Application::Ingest::XPATH;
672 use base qw/OpenILS::Application::Ingest/;
673 use Unicode::Normalize;
674
675 # give this an XML documentElement and an XPATH expression
676 sub xpath_to_string {
677         my $xml = shift;
678         my $xpath = shift;
679         my $ns_uri = shift;
680         my $ns_prefix = shift;
681         my $unique = shift;
682
683         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
684
685         my $string = "";
686
687         # grab the set of matching nodes
688         my @nodes = $xml->findnodes( $xpath );
689         for my $value (@nodes) {
690
691                 # grab all children of the node
692                 my @children = $value->childNodes();
693                 for my $child (@children) {
694
695                         # add the childs content to the growing buffer
696                         my $content = quotemeta($child->textContent);
697                         next if ($unique && $string =~ /$content/);  # uniquify the values
698                         $string .= $child->textContent . " ";
699                 }
700                 if( ! @children ) {
701                         $string .= $value->textContent . " ";
702                 }
703         }
704
705     $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
706
707         return NFD($string);
708 }
709
710 sub class_index_string_xml {
711         my $self = shift;
712         my $client = shift;
713         my $xml = shift;
714         my @classes = @_;
715
716         OpenILS::Application::Ingest->post_init();
717         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
718
719         my %transform_cache;
720         
721         for my $class (@classes) {
722                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
723                 for my $type ( keys %{ $xpathset->{$class} } ) {
724
725                         my $def = $xpathset->{$class}->{$type};
726                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
727
728                         my $document = $xml;
729
730                         if ($sf->{xslt}) {
731                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
732                                 $transform_cache{$def->{format}} = $document;
733                         }
734
735                         my $value =  xpath_to_string(
736                                         $document->documentElement      => $def->{xpath},
737                                         $sf->{ns}                       => $def->{format},
738                                         1
739                         );
740
741                         next unless $value;
742
743                         $value = NFD($value);
744                         $value =~ s/\pM+//sgo;
745                         $value =~ s/\pC+//sgo;
746                         $value =~ s/\W+$//sgo;
747
748                         $value =~ s/\b\.+\b//sgo;
749                         $value = lc($value);
750
751                         my $fm = $class_constructor->new;
752                         $fm->value( $value );
753                         $fm->field( $xpathset->{$class}->{$type}->{id} );
754                         $client->respond($fm);
755                 }
756         }
757         return undef;
758 }
759 __PACKAGE__->register_method(  
760         api_name        => "open-ils.ingest.field_entry.class.xml",
761         method          => "class_index_string_xml",
762         api_level       => 1,
763         argc            => 2,
764         stream          => 1,
765 );                      
766
767 sub class_index_string_record {
768         my $self = shift;
769         my $client = shift;
770         my $rec = shift;
771         my @classes = shift;
772
773         OpenILS::Application::Ingest->post_init();
774         my $r = OpenSRF::AppSession
775                         ->create('open-ils.cstore')
776                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
777                         ->gather(1);
778
779         return undef unless ($r and @$r);
780
781         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
782                 $fm->source($rec);
783                 $client->respond($fm);
784         }
785         return undef;
786 }
787 __PACKAGE__->register_method(  
788         api_name        => "open-ils.ingest.field_entry.class.record",
789         method          => "class_index_string_record",
790         api_level       => 1,
791         argc            => 2,
792         stream          => 1,
793 );                      
794
795 sub all_index_string_xml {
796         my $self = shift;
797         my $client = shift;
798         my $xml = shift;
799
800         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
801                 $client->respond($fm);
802         }
803         return undef;
804 }
805 __PACKAGE__->register_method(  
806         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
807         method          => "all_index_string_xml",
808         api_level       => 1,
809         argc            => 1,
810         stream          => 1,
811 );                      
812
813 sub all_index_string_record {
814         my $self = shift;
815         my $client = shift;
816         my $rec = shift;
817
818         OpenILS::Application::Ingest->post_init();
819         my $r = OpenSRF::AppSession
820                         ->create('open-ils.cstore')
821                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
822                         ->gather(1);
823
824         return undef unless ($r and @$r);
825
826         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
827                 $fm->source($rec);
828                 $client->respond($fm);
829         }
830         return undef;
831 }
832 __PACKAGE__->register_method(  
833         api_name        => "open-ils.ingest.extract.field_entry.all.record",
834         method          => "all_index_string_record",
835         api_level       => 1,
836         argc            => 1,
837         stream          => 1,
838 );                      
839
840 # --------------------------------------------------------------------------------
841 # Flat MARC
842
843 package OpenILS::Application::Ingest::FlatMARC;
844 use base qw/OpenILS::Application::Ingest/;
845 use Unicode::Normalize;
846
847
848 sub _marcxml_to_full_rows {
849
850         my $marcxml = shift;
851         my $xmltype = shift || 'metabib';
852
853         my $type = "Fieldmapper::${xmltype}::full_rec";
854
855         my @ns_list;
856         
857         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
858
859         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
860                 next unless $tagline;
861
862                 my $ns = $type->new;
863
864                 $ns->tag( 'LDR' );
865                 my $val = $tagline->textContent;
866                 $val = NFD($val);
867                 $val =~ s/\pM+//sgo;
868                 $val =~ s/\pC+//sgo;
869                 $val =~ s/\W+$//sgo;
870                 $ns->value( $val );
871
872                 push @ns_list, $ns;
873         }
874
875         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
876                 next unless $tagline;
877
878                 my $ns = $type->new;
879
880                 $ns->tag( $tagline->getAttribute( "tag" ) );
881                 my $val = $tagline->textContent;
882                 $val = NFD($val);
883                 $val =~ s/\pM+//sgo;
884                 $val =~ s/\pC+//sgo;
885                 $val =~ s/\W+$//sgo;
886                 $ns->value( $val );
887
888                 push @ns_list, $ns;
889         }
890
891         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
892                 next unless $tagline;
893
894                 my $tag = $tagline->getAttribute( "tag" );
895                 my $ind1 = $tagline->getAttribute( "ind1" );
896                 my $ind2 = $tagline->getAttribute( "ind2" );
897
898                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
899                         next unless $data;
900
901                         my $ns = $type->new;
902
903                         $ns->tag( $tag );
904                         $ns->ind1( $ind1 );
905                         $ns->ind2( $ind2 );
906                         $ns->subfield( $data->getAttribute( "code" ) );
907                         my $val = $data->textContent;
908                         $val = NFD($val);
909                         $val =~ s/\pM+//sgo;
910                         $val =~ s/\pC+//sgo;
911                         $val =~ s/\W+$//sgo;
912             $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
913                         $ns->value( lc($val) );
914
915                         push @ns_list, $ns;
916                 }
917         }
918
919         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
920         return @ns_list;
921 }
922
923 sub flat_marc_xml {
924         my $self = shift;
925         my $client = shift;
926         my $xml = shift;
927
928         $log->debug("processing [$xml]");
929
930         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
931
932         my $type = 'metabib';
933         $type = 'authority' if ($self->api_name =~ /authority/o);
934
935         OpenILS::Application::Ingest->post_init();
936
937         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
938         return undef;
939 }
940 __PACKAGE__->register_method(  
941         api_name        => "open-ils.ingest.flat_marc.authority.xml",
942         method          => "flat_marc_xml",
943         api_level       => 1,
944         argc            => 1,
945         stream          => 1,
946 );                      
947 __PACKAGE__->register_method(  
948         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
949         method          => "flat_marc_xml",
950         api_level       => 1,
951         argc            => 1,
952         stream          => 1,
953 );                      
954
955 sub flat_marc_record {
956         my $self = shift;
957         my $client = shift;
958         my $rec = shift;
959
960         my $type = 'biblio';
961         $type = 'authority' if ($self->api_name =~ /authority/o);
962
963         OpenILS::Application::Ingest->post_init();
964         my $r = OpenSRF::AppSession
965                         ->create('open-ils.cstore')
966                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
967                         ->gather(1);
968
969
970         return undef unless ($r and $r->marc);
971
972         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
973         for my $row (@rows) {
974                 $client->respond($row);
975                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
976         }
977         return undef;
978 }
979 __PACKAGE__->register_method(  
980         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
981         method          => "flat_marc_record",
982         api_level       => 1,
983         argc            => 1,
984         stream          => 1,
985 );                      
986 __PACKAGE__->register_method(  
987         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
988         method          => "flat_marc_record",
989         api_level       => 1,
990         argc            => 1,
991         stream          => 1,
992 );                      
993
994 # --------------------------------------------------------------------------------
995 # Fingerprinting
996
997 package OpenILS::Application::Ingest::Biblio::Fingerprint;
998 use base qw/OpenILS::Application::Ingest/;
999 use Unicode::Normalize;
1000 use OpenSRF::EX qw/:try/;
1001
1002 sub biblio_fingerprint_record {
1003         my $self = shift;
1004         my $client = shift;
1005         my $rec = shift;
1006
1007         OpenILS::Application::Ingest->post_init();
1008
1009         my $r = OpenSRF::AppSession
1010                         ->create('open-ils.cstore')
1011                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1012                         ->gather(1);
1013
1014         return undef unless ($r and $r->marc);
1015
1016         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1017         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1018         $fp->{quality} = int($fp->{quality});
1019         return $fp;
1020 }
1021 __PACKAGE__->register_method(  
1022         api_name        => "open-ils.ingest.fingerprint.record",
1023         method          => "biblio_fingerprint_record",
1024         api_level       => 1,
1025         argc            => 1,
1026 );                      
1027
1028 our $fp_script;
1029 sub biblio_fingerprint {
1030         my $self = shift;
1031         my $client = shift;
1032         my $xml = OpenILS::Application::Ingest::entityize(shift);
1033
1034         $log->internal("Got MARC [$xml]");
1035
1036         if(!$fp_script) {
1037                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1038                 my $conf = OpenSRF::Utils::SettingsClient->new;
1039
1040                 my $libs        = $conf->config_value(@pfx, 'script_path');
1041                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1042                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1043
1044                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1045                 
1046                 $fp_script = new OpenILS::Utils::ScriptRunner
1047                         ( file          => $script_file,
1048                           paths         => $script_libs,
1049                           reset_count   => 100 );
1050         }
1051
1052         $fp_script->insert('environment' => {marc => $xml} => 1);
1053
1054         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1055         $log->debug("Script for biblio fingerprinting completed successfully...");
1056
1057         return $res;
1058 }
1059 __PACKAGE__->register_method(  
1060         api_name        => "open-ils.ingest.fingerprint.xml",
1061         method          => "biblio_fingerprint",
1062         api_level       => 1,
1063         argc            => 1,
1064 );                      
1065
1066 our $rd_script;
1067 sub biblio_descriptor {
1068         my $self = shift;
1069         my $client = shift;
1070         my $xml = OpenILS::Application::Ingest::entityize(shift);
1071
1072         $log->internal("Got MARC [$xml]");
1073
1074         if(!$rd_script) {
1075                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1076                 my $conf = OpenSRF::Utils::SettingsClient->new;
1077
1078                 my $libs        = $conf->config_value(@pfx, 'script_path');
1079                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1080                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1081
1082                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1083                 
1084                 $rd_script = new OpenILS::Utils::ScriptRunner
1085                         ( file          => $script_file,
1086                           paths         => $script_libs,
1087                           reset_count   => 100 );
1088         }
1089
1090         $log->debug("Setting up environment for descriptor extraction script...");
1091         $rd_script->insert('environment.marc' => $xml => 1);
1092         $log->debug("Environment building complete...");
1093
1094         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1095         $log->debug("Script for biblio descriptor extraction completed successfully");
1096
1097     my $d1 = $res->date1;
1098     if ($d1 && $d1 ne '    ') {
1099         $d1 =~ tr/ux/00/;
1100         $res->date1( $d1 );
1101     }
1102
1103     my $d2 = $res->date2;
1104     if ($d2 && $d2 ne '    ') {
1105         $d2 =~ tr/ux/99/;
1106         $res->date2( $d2 );
1107     }
1108
1109         return $res;
1110 }
1111 __PACKAGE__->register_method(  
1112         api_name        => "open-ils.ingest.descriptor.xml",
1113         method          => "biblio_descriptor",
1114         api_level       => 1,
1115         argc            => 1,
1116 );                      
1117
1118
1119 1;
1120