]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
polute metabib.full_rec with non-filing trimmed versions of 245a
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
11
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
15
16 use OpenILS::Utils::Fieldmapper;
17
18 use XML::LibXML;
19 use XML::LibXSLT;
20 use Time::HiRes qw(time);
21
22 our %supported_formats = (
23         mods32  => {ns => 'http://www.loc.gov/mods/v3'},
24         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
25         mods    => {ns => 'http://www.loc.gov/mods/'},
26         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
27         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
28         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
29         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
30         atom    => {ns => 'http://www.w3.org/2005/Atom'},
31         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
32         rss092  => {ns => ''},
33         rss093  => {ns => ''},
34         rss094  => {ns => ''},
35         rss10   => {ns => 'http://purl.org/rss/1.0/'},
36         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
37         rss2    => {ns => ''},
38 );
39
40
41 my $log = 'OpenSRF::Utils::Logger';
42
43 my  $parser = XML::LibXML->new();
44 my  $xslt = XML::LibXSLT->new();
45
46 my  $mods_sheet;
47 my  $mads_sheet;
48 my  $xpathset = {};
49 sub initialize {}
50 sub child_init {}
51
52 sub post_init {
53
54         unless (keys %$xpathset) {
55                 $log->debug("Running post_init", DEBUG);
56
57                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
58
59                 unless ($supported_formats{mods}{xslt}) {
60                         $log->debug("Loading MODS XSLT", DEBUG);
61                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
62                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63                 }
64
65                 unless ($supported_formats{mods3}{xslt}) {
66                         $log->debug("Loading MODS v3 XSLT", DEBUG);
67                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
68                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
69                 }
70
71                 unless ($supported_formats{mods32}{xslt}) {
72                         $log->debug("Loading MODS v32 XSLT", DEBUG);
73                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
74                         $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
75                 }
76
77                 my $req = OpenSRF::AppSession
78                                 ->create('open-ils.cstore')
79                                 
80                                 # XXX testing new metabib field use for faceting
81                                 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
82                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
83
84                                 ->gather(1);
85
86                 if (ref $req and @$req) {
87                         for my $f (@$req) {
88                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
89                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
90                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
91                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
92                         }
93                 }
94         }
95 }
96
97 sub entityize {
98         my $stuff = shift;
99         my $form = shift;
100
101         if ($form eq 'D') {
102                 $stuff = NFD($stuff);
103         } else {
104                 $stuff = NFC($stuff);
105         }
106
107         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
108         return $stuff;
109 }
110
111 # --------------------------------------------------------------------------------
112 # Biblio ingest
113
114 package OpenILS::Application::Ingest::Biblio;
115 use base qw/OpenILS::Application::Ingest/;
116 use Unicode::Normalize;
117
118 sub rw_biblio_ingest_single_object {
119         my $self = shift;
120         my $client = shift;
121         my $bib = shift;
122
123         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
124         return undef unless ($blob);
125
126         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
127         $bib->quality( $blob->{fingerprint}->{quality} );
128
129         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
130
131         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
132
133         # update full_rec stuff ...
134         my $tmp = $cstore->request(
135                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
136                 { record => $bib->id }
137         )->gather(1);
138
139         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
140         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
141
142         # update rec_descriptor stuff ...
143         $tmp = $cstore->request(
144                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
145                 { record => $bib->id }
146         )->gather(1);
147
148         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
149         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
150
151         # deal with classed fields...
152         for my $class ( qw/title author subject keyword series/ ) {
153                 $tmp = $cstore->request(
154                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
155                         { source => $bib->id }
156                 )->gather(1);
157
158                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
159         }
160         for my $obj ( @{ $blob->{field_entries} } ) {
161                 my $class = $obj->class_name;
162                 $class =~ s/^Fieldmapper:://o;
163                 $class =~ s/::/./go;
164                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
165         }
166
167         # update MR map ...
168
169         $tmp = $cstore->request(
170                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
171                 { source => $bib->id }
172         )->gather(1);
173
174         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
175
176         # get the old MRs
177         my $old_mrs = $cstore->request(
178                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
179         )->gather(1) if (@$tmp);
180
181         $old_mrs = [] if (!ref($old_mrs));
182
183         my $mr;
184         for my $m (@$old_mrs) {
185                 if ($m->fingerprint eq $bib->fingerprint) {
186                         $mr = $m;
187                 } else {
188                         my $others = $cstore->request(
189                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
190                         )->gather(1);
191
192                         if (!@$others) {
193                                 $cstore->request(
194                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
195                                 )->gather(1);
196                         }
197
198                         $m->isdeleted(1);
199                 }
200         }
201
202         my $holds;
203         if (!$mr) {
204                 # Get the matchin MR, if any.
205                 $mr = $cstore->request(
206                         'open-ils.cstore.direct.metabib.metarecord.search',
207                         { fingerprint => $bib->fingerprint }
208                 )->gather(1);
209
210                 $holds = $cstore->request(
211                         'open-ils.cstore.direct.action.hold_request.search.atomic',
212                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
213                 )->gather(1) if (@$old_mrs);
214
215                 if ($mr) {
216                         for my $h (@$holds) {
217                                 $h->target($mr);
218                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
219                                 $h->ischanged(1);
220                         }
221                 }
222         }
223
224         if (!$mr) {
225                 $mr = new Fieldmapper::metabib::metarecord;
226                 $mr->fingerprint( $bib->fingerprint );
227                 $mr->master_record( $bib->id );
228                 $mr->id(
229                         $cstore->request(
230                                 "open-ils.cstore.direct.metabib.metarecord.create",
231                                 $mr => { quiet => 'true' }
232                         )->gather(1)
233                 );
234
235                 for my $h (grep { !$_->ischanged } @$holds) {
236                         $h->target($mr);
237                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
238                 }
239         } else {
240                 my $mrm = $cstore->request(
241                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
242                         { metarecord => $mr->id }
243                 )->gather(1);
244
245                 if (@$mrm) {
246                         my $best = $cstore->request(
247                                 "open-ils.cstore.direct.biblio.record_entry.search",
248                                 { id => [ map { $_->source } @$mrm ] },
249                                 { 'select'      => { bre => [ qw/id quality/ ] },
250                                 order_by        => { bre => "quality desc" },
251                                 limit           => 1,
252                                 }
253                         )->gather(1);
254
255                         if ($best->quality > $bib->quality) {
256                                 $mr->master_record($best->id);
257                         } else {
258                                 $mr->master_record($bib->id);
259                         }
260                 } else {
261                         $mr->master_record($bib->id);
262                 }
263
264                 $mr->clear_mods;
265
266                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
267         }
268
269         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
270         $mrm->source($bib->id);
271         $mrm->metarecord($mr->id);
272
273         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
274         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
275
276         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
277     $cstore->disconnect;
278
279         return $bib->id;
280 }
281 __PACKAGE__->register_method(  
282         api_name        => "open-ils.ingest.full.biblio.object",
283         method          => "rw_biblio_ingest_single_object",
284         api_level       => 1,
285         argc            => 1,
286 );                      
287
288 sub rw_biblio_ingest_single_record {
289         my $self = shift;
290         my $client = shift;
291         my $rec = shift;
292
293         OpenILS::Application::Ingest->post_init();
294         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
295         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
296
297         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
298
299         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
300         $cstore->disconnect;
301
302         return undef unless ($r and @$r);
303
304         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
305 }
306 __PACKAGE__->register_method(  
307         api_name        => "open-ils.ingest.full.biblio.record",
308         method          => "rw_biblio_ingest_single_record",
309         api_level       => 1,
310         argc            => 1,
311 );                      
312
313 sub rw_biblio_ingest_record_list {
314         my $self = shift;
315         my $client = shift;
316         my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
317
318         OpenILS::Application::Ingest->post_init();
319         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
320         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
321
322         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
323
324         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
325         $cstore->disconnect;
326
327         return undef unless ($r and @$r);
328
329         my $count = 0;
330         $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
331
332         return $count;
333 }
334 __PACKAGE__->register_method(  
335         api_name        => "open-ils.ingest.full.biblio.record_list",
336         method          => "rw_biblio_ingest_record_list",
337         api_level       => 1,
338         argc            => 1,
339 );                      
340
341 sub ro_biblio_ingest_single_object {
342         my $self = shift;
343         my $client = shift;
344         my $bib = shift;
345         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
346
347         my $document = $parser->parse_string($xml);
348
349         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
350         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
351         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
352         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
353
354         $_->source($bib->id) for (@mXfe);
355         $_->record($bib->id) for (@mfr);
356         $rd->record($bib->id) if ($rd);
357
358         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
359 }
360 __PACKAGE__->register_method(  
361         api_name        => "open-ils.ingest.full.biblio.object.readonly",
362         method          => "ro_biblio_ingest_single_object",
363         api_level       => 1,
364         argc            => 1,
365 );                      
366
367 sub ro_biblio_ingest_single_xml {
368         my $self = shift;
369         my $client = shift;
370         my $xml = OpenILS::Application::Ingest::entityize(shift);
371
372         my $document = $parser->parse_string($xml);
373
374         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
375         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
376         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
377         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
378
379         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
380 }
381 __PACKAGE__->register_method(  
382         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
383         method          => "ro_biblio_ingest_single_xml",
384         api_level       => 1,
385         argc            => 1,
386 );                      
387
388 sub ro_biblio_ingest_single_record {
389         my $self = shift;
390         my $client = shift;
391         my $rec = shift;
392
393         OpenILS::Application::Ingest->post_init();
394         my $r = OpenSRF::AppSession
395                         ->create('open-ils.cstore')
396                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
397                         ->gather(1);
398
399         return undef unless ($r and @$r);
400
401         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
402
403         $_->source($rec) for (@{$res->{field_entries}});
404         $_->record($rec) for (@{$res->{full_rec}});
405         $res->{descriptor}->record($rec);
406
407         return $res;
408 }
409 __PACKAGE__->register_method(  
410         api_name        => "open-ils.ingest.full.biblio.record.readonly",
411         method          => "ro_biblio_ingest_single_record",
412         api_level       => 1,
413         argc            => 1,
414 );                      
415
416 sub ro_biblio_ingest_stream_record {
417         my $self = shift;
418         my $client = shift;
419
420         OpenILS::Application::Ingest->post_init();
421
422         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
423
424         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
425         
426                 my $rec = $resp->content;
427                 last unless (defined $rec);
428
429                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
430                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
431
432                 $_->source($rec) for (@{$res->{field_entries}});
433                 $_->record($rec) for (@{$res->{full_rec}});
434
435                 $client->respond( $res );
436         }
437
438         return undef;
439 }
440 __PACKAGE__->register_method(  
441         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
442         method          => "ro_biblio_ingest_stream_record",
443         api_level       => 1,
444         stream          => 1,
445 );                      
446
447 sub ro_biblio_ingest_stream_xml {
448         my $self = shift;
449         my $client = shift;
450
451         OpenILS::Application::Ingest->post_init();
452
453         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
454
455         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
456         
457                 my $xml = $resp->content;
458                 last unless (defined $xml);
459
460                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
461                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
462
463                 $client->respond( $res );
464         }
465
466         return undef;
467 }
468 __PACKAGE__->register_method(  
469         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
470         method          => "ro_biblio_ingest_stream_xml",
471         api_level       => 1,
472         stream          => 1,
473 );                      
474
475 sub rw_biblio_ingest_stream_import {
476         my $self = shift;
477         my $client = shift;
478
479         OpenILS::Application::Ingest->post_init();
480
481         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
482
483         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
484         
485                 my $bib = $resp->content;
486                 last unless (defined $bib);
487
488                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
489                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
490
491                 $_->source($bib->id) for (@{$res->{field_entries}});
492                 $_->record($bib->id) for (@{$res->{full_rec}});
493
494                 $client->respond( $res );
495         }
496
497         return undef;
498 }
499 __PACKAGE__->register_method(  
500         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
501         method          => "rw_biblio_ingest_stream_import",
502         api_level       => 1,
503         stream          => 1,
504 );                      
505
506
507 # --------------------------------------------------------------------------------
508 # Authority ingest
509
510 package OpenILS::Application::Ingest::Authority;
511 use base qw/OpenILS::Application::Ingest/;
512 use Unicode::Normalize;
513
514 sub ro_authority_ingest_single_object {
515         my $self = shift;
516         my $client = shift;
517         my $bib = shift;
518         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
519
520         my $document = $parser->parse_string($xml);
521
522         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
523
524         $_->record($bib->id) for (@mfr);
525
526         return { full_rec => \@mfr };
527 }
528 __PACKAGE__->register_method(  
529         api_name        => "open-ils.ingest.full.authority.object.readonly",
530         method          => "ro_authority_ingest_single_object",
531         api_level       => 1,
532         argc            => 1,
533 );                      
534
535 sub ro_authority_ingest_single_xml {
536         my $self = shift;
537         my $client = shift;
538         my $xml = OpenILS::Application::Ingest::entityize(shift);
539
540         my $document = $parser->parse_string($xml);
541
542         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
543
544         return { full_rec => \@mfr };
545 }
546 __PACKAGE__->register_method(  
547         api_name        => "open-ils.ingest.full.authority.xml.readonly",
548         method          => "ro_authority_ingest_single_xml",
549         api_level       => 1,
550         argc            => 1,
551 );                      
552
553 sub ro_authority_ingest_single_record {
554         my $self = shift;
555         my $client = shift;
556         my $rec = shift;
557
558         OpenILS::Application::Ingest->post_init();
559         my $r = OpenSRF::AppSession
560                         ->create('open-ils.cstore')
561                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
562                         ->gather(1);
563
564         return undef unless ($r and @$r);
565
566         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
567
568         $_->record($rec) for (@{$res->{full_rec}});
569         $res->{descriptor}->record($rec);
570
571         return $res;
572 }
573 __PACKAGE__->register_method(  
574         api_name        => "open-ils.ingest.full.authority.record.readonly",
575         method          => "ro_authority_ingest_single_record",
576         api_level       => 1,
577         argc            => 1,
578 );                      
579
580 sub ro_authority_ingest_stream_record {
581         my $self = shift;
582         my $client = shift;
583
584         OpenILS::Application::Ingest->post_init();
585
586         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
587
588         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
589         
590                 my $rec = $resp->content;
591                 last unless (defined $rec);
592
593                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
594                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
595
596                 $_->record($rec) for (@{$res->{full_rec}});
597
598                 $client->respond( $res );
599         }
600
601         return undef;
602 }
603 __PACKAGE__->register_method(  
604         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
605         method          => "ro_authority_ingest_stream_record",
606         api_level       => 1,
607         stream          => 1,
608 );                      
609
610 sub ro_authority_ingest_stream_xml {
611         my $self = shift;
612         my $client = shift;
613
614         OpenILS::Application::Ingest->post_init();
615
616         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
617
618         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
619         
620                 my $xml = $resp->content;
621                 last unless (defined $xml);
622
623                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
624                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
625
626                 $client->respond( $res );
627         }
628
629         return undef;
630 }
631 __PACKAGE__->register_method(  
632         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
633         method          => "ro_authority_ingest_stream_xml",
634         api_level       => 1,
635         stream          => 1,
636 );                      
637
638 sub rw_authority_ingest_stream_import {
639         my $self = shift;
640         my $client = shift;
641
642         OpenILS::Application::Ingest->post_init();
643
644         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
645
646         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
647         
648                 my $bib = $resp->content;
649                 last unless (defined $bib);
650
651                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
652                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
653
654                 $_->record($bib->id) for (@{$res->{full_rec}});
655
656                 $client->respond( $res );
657         }
658
659         return undef;
660 }
661 __PACKAGE__->register_method(  
662         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
663         method          => "rw_authority_ingest_stream_import",
664         api_level       => 1,
665         stream          => 1,
666 );                      
667
668
669 # --------------------------------------------------------------------------------
670 # MARC index extraction
671
672 package OpenILS::Application::Ingest::XPATH;
673 use base qw/OpenILS::Application::Ingest/;
674 use Unicode::Normalize;
675
676 # give this an XML documentElement and an XPATH expression
677 sub xpath_to_string {
678         my $xml = shift;
679         my $xpath = shift;
680         my $ns_uri = shift;
681         my $ns_prefix = shift;
682         my $unique = shift;
683
684         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
685
686         my $string = "";
687
688         # grab the set of matching nodes
689         my @nodes = $xml->findnodes( $xpath );
690         for my $value (@nodes) {
691
692                 # grab all children of the node
693                 my @children = $value->childNodes();
694                 for my $child (@children) {
695
696                         # add the childs content to the growing buffer
697                         my $content = quotemeta($child->textContent);
698                         next if ($unique && $string =~ /$content/);  # uniquify the values
699                         $string .= $child->textContent . " ";
700                 }
701                 if( ! @children ) {
702                         $string .= $value->textContent . " ";
703                 }
704         }
705
706     $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
707
708         return NFD($string);
709 }
710
711 sub class_index_string_xml {
712         my $self = shift;
713         my $client = shift;
714         my $xml = shift;
715         my @classes = @_;
716
717         OpenILS::Application::Ingest->post_init();
718         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
719
720         my %transform_cache;
721         
722         for my $class (@classes) {
723                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
724                 for my $type ( keys %{ $xpathset->{$class} } ) {
725
726                         my $def = $xpathset->{$class}->{$type};
727                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
728
729                         my $document = $xml;
730
731                         if ($sf->{xslt}) {
732                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
733                                 $transform_cache{$def->{format}} = $document;
734                         }
735
736                         my $value =  xpath_to_string(
737                                         $document->documentElement      => $def->{xpath},
738                                         $sf->{ns}                       => $def->{format},
739                                         1
740                         );
741
742                         next unless $value;
743
744                         $value = NFD($value);
745                         $value =~ s/\pM+//sgo;
746                         $value =~ s/\pC+//sgo;
747                         $value =~ s/\W+$//sgo;
748
749                         $value =~ s/\b\.+\b//sgo;
750                         $value = lc($value);
751
752                         my $fm = $class_constructor->new;
753                         $fm->value( $value );
754                         $fm->field( $xpathset->{$class}->{$type}->{id} );
755                         $client->respond($fm);
756                 }
757         }
758         return undef;
759 }
760 __PACKAGE__->register_method(  
761         api_name        => "open-ils.ingest.field_entry.class.xml",
762         method          => "class_index_string_xml",
763         api_level       => 1,
764         argc            => 2,
765         stream          => 1,
766 );                      
767
768 sub class_index_string_record {
769         my $self = shift;
770         my $client = shift;
771         my $rec = shift;
772         my @classes = shift;
773
774         OpenILS::Application::Ingest->post_init();
775         my $r = OpenSRF::AppSession
776                         ->create('open-ils.cstore')
777                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
778                         ->gather(1);
779
780         return undef unless ($r and @$r);
781
782         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
783                 $fm->source($rec);
784                 $client->respond($fm);
785         }
786         return undef;
787 }
788 __PACKAGE__->register_method(  
789         api_name        => "open-ils.ingest.field_entry.class.record",
790         method          => "class_index_string_record",
791         api_level       => 1,
792         argc            => 2,
793         stream          => 1,
794 );                      
795
796 sub all_index_string_xml {
797         my $self = shift;
798         my $client = shift;
799         my $xml = shift;
800
801         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
802                 $client->respond($fm);
803         }
804         return undef;
805 }
806 __PACKAGE__->register_method(  
807         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
808         method          => "all_index_string_xml",
809         api_level       => 1,
810         argc            => 1,
811         stream          => 1,
812 );                      
813
814 sub all_index_string_record {
815         my $self = shift;
816         my $client = shift;
817         my $rec = shift;
818
819         OpenILS::Application::Ingest->post_init();
820         my $r = OpenSRF::AppSession
821                         ->create('open-ils.cstore')
822                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
823                         ->gather(1);
824
825         return undef unless ($r and @$r);
826
827         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
828                 $fm->source($rec);
829                 $client->respond($fm);
830         }
831         return undef;
832 }
833 __PACKAGE__->register_method(  
834         api_name        => "open-ils.ingest.extract.field_entry.all.record",
835         method          => "all_index_string_record",
836         api_level       => 1,
837         argc            => 1,
838         stream          => 1,
839 );                      
840
841 # --------------------------------------------------------------------------------
842 # Flat MARC
843
844 package OpenILS::Application::Ingest::FlatMARC;
845 use base qw/OpenILS::Application::Ingest/;
846 use Unicode::Normalize;
847
848
849 sub _marcxml_to_full_rows {
850
851         my $marcxml = shift;
852         my $xmltype = shift || 'metabib';
853
854         my $type = "Fieldmapper::${xmltype}::full_rec";
855
856         my @ns_list;
857         
858         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
859
860         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
861                 next unless $tagline;
862
863                 my $ns = $type->new;
864
865                 $ns->tag( 'LDR' );
866                 my $val = $tagline->textContent;
867                 $val = NFD($val);
868                 $val =~ s/\pM+//sgo;
869                 $val =~ s/\pC+//sgo;
870                 $val =~ s/\W+$//sgo;
871                 $ns->value( $val );
872
873                 push @ns_list, $ns;
874         }
875
876         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
877                 next unless $tagline;
878
879                 my $ns = $type->new;
880
881                 $ns->tag( $tagline->getAttribute( "tag" ) );
882                 my $val = $tagline->textContent;
883                 $val = NFD($val);
884                 $val =~ s/\pM+//sgo;
885                 $val =~ s/\pC+//sgo;
886                 $val =~ s/\W+$//sgo;
887                 $ns->value( $val );
888
889                 push @ns_list, $ns;
890         }
891
892         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
893                 next unless $tagline;
894
895                 my $tag = $tagline->getAttribute( "tag" );
896                 my $ind1 = $tagline->getAttribute( "ind1" );
897                 my $ind2 = $tagline->getAttribute( "ind2" );
898
899                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
900                         next unless $data;
901
902                         my $ns = $type->new;
903
904                         $ns->tag( $tag );
905                         $ns->ind1( $ind1 );
906                         $ns->ind2( $ind2 );
907                         $ns->subfield( $data->getAttribute( "code" ) );
908                         my $val = $data->textContent;
909                         $val = NFD($val);
910                         $val =~ s/\pM+//sgo;
911                         $val =~ s/\pC+//sgo;
912                         $val =~ s/\W+$//sgo;
913             $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
914                         $ns->value( lc($val) );
915
916                         push @ns_list, $ns;
917                 }
918
919         if ($xmltype eq 'metabib' and $tag eq '245') {
920                 $tag = 'tnf';
921     
922                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
923                         next unless ($data and $data->getAttribute( "code" ) eq 'a');
924     
925                         $ns = $type->new;
926     
927                         $ns->tag( $tag );
928                         $ns->ind1( $ind1 );
929                         $ns->ind2( $ind2 );
930                         $ns->subfield( $data->getAttribute( "code" ) );
931                         my $val = substr( $data->textContent, $ind2 );
932                         $val = NFD($val);
933                         $val =~ s/\pM+//sgo;
934                         $val =~ s/\pC+//sgo;
935                         $val =~ s/\W+$//sgo;
936                 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
937                         $ns->value( lc($val) );
938     
939                         push @ns_list, $ns;
940                 }
941         }
942         }
943
944         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
945         return @ns_list;
946 }
947
948 sub flat_marc_xml {
949         my $self = shift;
950         my $client = shift;
951         my $xml = shift;
952
953         $log->debug("processing [$xml]");
954
955         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
956
957         my $type = 'metabib';
958         $type = 'authority' if ($self->api_name =~ /authority/o);
959
960         OpenILS::Application::Ingest->post_init();
961
962         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
963         return undef;
964 }
965 __PACKAGE__->register_method(  
966         api_name        => "open-ils.ingest.flat_marc.authority.xml",
967         method          => "flat_marc_xml",
968         api_level       => 1,
969         argc            => 1,
970         stream          => 1,
971 );                      
972 __PACKAGE__->register_method(  
973         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
974         method          => "flat_marc_xml",
975         api_level       => 1,
976         argc            => 1,
977         stream          => 1,
978 );                      
979
980 sub flat_marc_record {
981         my $self = shift;
982         my $client = shift;
983         my $rec = shift;
984
985         my $type = 'biblio';
986         $type = 'authority' if ($self->api_name =~ /authority/o);
987
988         OpenILS::Application::Ingest->post_init();
989         my $r = OpenSRF::AppSession
990                         ->create('open-ils.cstore')
991                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
992                         ->gather(1);
993
994
995         return undef unless ($r and $r->marc);
996
997         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
998         for my $row (@rows) {
999                 $client->respond($row);
1000                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
1001         }
1002         return undef;
1003 }
1004 __PACKAGE__->register_method(  
1005         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
1006         method          => "flat_marc_record",
1007         api_level       => 1,
1008         argc            => 1,
1009         stream          => 1,
1010 );                      
1011 __PACKAGE__->register_method(  
1012         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
1013         method          => "flat_marc_record",
1014         api_level       => 1,
1015         argc            => 1,
1016         stream          => 1,
1017 );                      
1018
1019 # --------------------------------------------------------------------------------
1020 # Fingerprinting
1021
1022 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1023 use base qw/OpenILS::Application::Ingest/;
1024 use Unicode::Normalize;
1025 use OpenSRF::EX qw/:try/;
1026
1027 sub biblio_fingerprint_record {
1028         my $self = shift;
1029         my $client = shift;
1030         my $rec = shift;
1031
1032         OpenILS::Application::Ingest->post_init();
1033
1034         my $r = OpenSRF::AppSession
1035                         ->create('open-ils.cstore')
1036                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1037                         ->gather(1);
1038
1039         return undef unless ($r and $r->marc);
1040
1041         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1042         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1043         $fp->{quality} = int($fp->{quality});
1044         return $fp;
1045 }
1046 __PACKAGE__->register_method(  
1047         api_name        => "open-ils.ingest.fingerprint.record",
1048         method          => "biblio_fingerprint_record",
1049         api_level       => 1,
1050         argc            => 1,
1051 );                      
1052
1053 our $fp_script;
1054 sub biblio_fingerprint {
1055         my $self = shift;
1056         my $client = shift;
1057         my $xml = OpenILS::Application::Ingest::entityize(shift);
1058
1059         $log->internal("Got MARC [$xml]");
1060
1061         if(!$fp_script) {
1062                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1063                 my $conf = OpenSRF::Utils::SettingsClient->new;
1064
1065                 my $libs        = $conf->config_value(@pfx, 'script_path');
1066                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1067                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1068
1069                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1070                 
1071                 $fp_script = new OpenILS::Utils::ScriptRunner
1072                         ( file          => $script_file,
1073                           paths         => $script_libs,
1074                           reset_count   => 100 );
1075         }
1076
1077         $fp_script->insert('environment' => {marc => $xml} => 1);
1078
1079         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1080         $log->debug("Script for biblio fingerprinting completed successfully...");
1081
1082         return $res;
1083 }
1084 __PACKAGE__->register_method(  
1085         api_name        => "open-ils.ingest.fingerprint.xml",
1086         method          => "biblio_fingerprint",
1087         api_level       => 1,
1088         argc            => 1,
1089 );                      
1090
1091 our $rd_script;
1092 sub biblio_descriptor {
1093         my $self = shift;
1094         my $client = shift;
1095         my $xml = OpenILS::Application::Ingest::entityize(shift);
1096
1097         $log->internal("Got MARC [$xml]");
1098
1099         if(!$rd_script) {
1100                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1101                 my $conf = OpenSRF::Utils::SettingsClient->new;
1102
1103                 my $libs        = $conf->config_value(@pfx, 'script_path');
1104                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1105                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1106
1107                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1108                 
1109                 $rd_script = new OpenILS::Utils::ScriptRunner
1110                         ( file          => $script_file,
1111                           paths         => $script_libs,
1112                           reset_count   => 100 );
1113         }
1114
1115         $log->debug("Setting up environment for descriptor extraction script...");
1116         $rd_script->insert('environment.marc' => $xml => 1);
1117         $log->debug("Environment building complete...");
1118
1119         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1120         $log->debug("Script for biblio descriptor extraction completed successfully");
1121
1122     my $d1 = $res->date1;
1123     if ($d1 && $d1 ne '    ') {
1124         $d1 =~ tr/ux/00/;
1125         $res->date1( $d1 );
1126     }
1127
1128     my $d2 = $res->date2;
1129     if ($d2 && $d2 ne '    ') {
1130         $d2 =~ tr/ux/99/;
1131         $res->date2( $d2 );
1132     }
1133
1134         return $res;
1135 }
1136 __PACKAGE__->register_method(  
1137         api_name        => "open-ils.ingest.descriptor.xml",
1138         method          => "biblio_descriptor",
1139         api_level       => 1,
1140         argc            => 1,
1141 );                      
1142
1143
1144 1;
1145