]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
ingest of dates thinko
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
11
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
15
16 use OpenILS::Utils::Fieldmapper;
17
18 use XML::LibXML;
19 use XML::LibXSLT;
20 use Time::HiRes qw(time);
21
22 our %supported_formats = (
23         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
24         mods    => {ns => 'http://www.loc.gov/mods/'},
25         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
26         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
27         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
28         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
29         atom    => {ns => 'http://www.w3.org/2005/Atom'},
30         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
31         rss092  => {ns => ''},
32         rss093  => {ns => ''},
33         rss094  => {ns => ''},
34         rss10   => {ns => 'http://purl.org/rss/1.0/'},
35         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
36         rss2    => {ns => ''},
37 );
38
39
40 my $log = 'OpenSRF::Utils::Logger';
41
42 my  $parser = XML::LibXML->new();
43 my  $xslt = XML::LibXSLT->new();
44
45 my  $mods_sheet;
46 my  $mads_sheet;
47 my  $xpathset = {};
48 sub initialize {}
49 sub child_init {}
50
51 sub post_init {
52
53         unless (keys %$xpathset) {
54                 $log->debug("Running post_init", DEBUG);
55
56                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57
58                 unless ($supported_formats{mods}{xslt}) {
59                         $log->debug("Loading MODS XSLT", DEBUG);
60                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
61                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
62                 }
63
64                 unless ($supported_formats{mods3}{xslt}) {
65                         $log->debug("Loading MODS v3 XSLT", DEBUG);
66                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
67                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
68                 }
69
70
71                 my $req = OpenSRF::AppSession
72                                 ->create('open-ils.cstore')
73                                 
74                                 # XXX testing new metabib field use for faceting
75                                 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
76                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
77
78                                 ->gather(1);
79
80                 if (ref $req and @$req) {
81                         for my $f (@$req) {
82                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
83                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
84                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
85                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
86                         }
87                 }
88         }
89 }
90
91 sub entityize {
92         my $stuff = shift;
93         my $form = shift;
94
95         if ($form eq 'D') {
96                 $stuff = NFD($stuff);
97         } else {
98                 $stuff = NFC($stuff);
99         }
100
101         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
102         return $stuff;
103 }
104
105 # --------------------------------------------------------------------------------
106 # Biblio ingest
107
108 package OpenILS::Application::Ingest::Biblio;
109 use base qw/OpenILS::Application::Ingest/;
110 use Unicode::Normalize;
111
112 sub rw_biblio_ingest_single_object {
113         my $self = shift;
114         my $client = shift;
115         my $bib = shift;
116
117         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
118         return undef unless ($blob);
119
120         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
121         $bib->quality( $blob->{fingerprint}->{quality} );
122
123         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
124
125         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
126
127         # update full_rec stuff ...
128         my $tmp = $cstore->request(
129                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
130                 { record => $bib->id }
131         )->gather(1);
132
133         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
134         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
135
136         # update rec_descriptor stuff ...
137         $tmp = $cstore->request(
138                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
139                 { record => $bib->id }
140         )->gather(1);
141
142         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
143         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
144
145         # deal with classed fields...
146         for my $class ( qw/title author subject keyword series/ ) {
147                 $tmp = $cstore->request(
148                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
149                         { source => $bib->id }
150                 )->gather(1);
151
152                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
153         }
154         for my $obj ( @{ $blob->{field_entries} } ) {
155                 my $class = $obj->class_name;
156                 $class =~ s/^Fieldmapper:://o;
157                 $class =~ s/::/./go;
158                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
159         }
160
161         # update MR map ...
162
163         $tmp = $cstore->request(
164                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
165                 { source => $bib->id }
166         )->gather(1);
167
168         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
169
170         # get the old MRs
171         my $old_mrs = $cstore->request(
172                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
173         )->gather(1) if (@$tmp);
174
175         $old_mrs = [] if (!ref($old_mrs));
176
177         my $mr;
178         for my $m (@$old_mrs) {
179                 if ($m->fingerprint eq $bib->fingerprint) {
180                         $mr = $m;
181                 } else {
182                         my $others = $cstore->request(
183                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
184                         )->gather(1);
185
186                         if (!@$others) {
187                                 $cstore->request(
188                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
189                                 )->gather(1);
190                         }
191
192                         $m->isdeleted(1);
193                 }
194         }
195
196         my $holds;
197         if (!$mr) {
198                 # Get the matchin MR, if any.
199                 $mr = $cstore->request(
200                         'open-ils.cstore.direct.metabib.metarecord.search',
201                         { fingerprint => $bib->fingerprint }
202                 )->gather(1);
203
204                 $holds = $cstore->request(
205                         'open-ils.cstore.direct.action.hold_request.search.atomic',
206                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
207                 )->gather(1) if (@$old_mrs);
208
209                 if ($mr) {
210                         for my $h (@$holds) {
211                                 $h->target($mr);
212                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
213                                 $h->ischanged(1);
214                         }
215                 }
216         }
217
218         if (!$mr) {
219                 $mr = new Fieldmapper::metabib::metarecord;
220                 $mr->fingerprint( $bib->fingerprint );
221                 $mr->master_record( $bib->id );
222                 $mr->id(
223                         $cstore->request(
224                                 "open-ils.cstore.direct.metabib.metarecord.create",
225                                 $mr => { quiet => 'true' }
226                         )->gather(1)
227                 );
228
229                 for my $h (grep { !$_->ischanged } @$holds) {
230                         $h->target($mr);
231                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
232                 }
233         } else {
234                 my $mrm = $cstore->request(
235                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
236                         { metarecord => $mr->id }
237                 )->gather(1);
238
239                 if (@$mrm) {
240                         my $best = $cstore->request(
241                                 "open-ils.cstore.direct.biblio.record_entry.search",
242                                 { id => [ map { $_->source } @$mrm ] },
243                                 { 'select'      => { bre => [ qw/id quality/ ] },
244                                 order_by        => { bre => "quality desc" },
245                                 limit           => 1,
246                                 }
247                         )->gather(1);
248
249                         if ($best->quality > $bib->quality) {
250                                 $mr->master_record($best->id);
251                         } else {
252                                 $mr->master_record($bib->id);
253                         }
254                 } else {
255                         $mr->master_record($bib->id);
256                 }
257
258                 $mr->clear_mods;
259
260                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
261         }
262
263         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
264         $mrm->source($bib->id);
265         $mrm->metarecord($mr->id);
266
267         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
268         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
269
270         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
271
272         return $bib->id;
273 }
274 __PACKAGE__->register_method(  
275         api_name        => "open-ils.ingest.full.biblio.object",
276         method          => "rw_biblio_ingest_single_object",
277         api_level       => 1,
278         argc            => 1,
279 );                      
280
281 sub rw_biblio_ingest_single_record {
282         my $self = shift;
283         my $client = shift;
284         my $rec = shift;
285
286         OpenILS::Application::Ingest->post_init();
287         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
288         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
289
290         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
291
292         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
293         $cstore->disconnect;
294
295         return undef unless ($r and @$r);
296
297         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
298 }
299 __PACKAGE__->register_method(  
300         api_name        => "open-ils.ingest.full.biblio.record",
301         method          => "rw_biblio_ingest_single_record",
302         api_level       => 1,
303         argc            => 1,
304 );                      
305
306 sub rw_biblio_ingest_record_list {
307         my $self = shift;
308         my $client = shift;
309         my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
310
311         OpenILS::Application::Ingest->post_init();
312         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
313         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
314
315         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
316
317         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
318         $cstore->disconnect;
319
320         return undef unless ($r and @$r);
321
322         my $count = 0;
323         $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
324
325         return $count;
326 }
327 __PACKAGE__->register_method(  
328         api_name        => "open-ils.ingest.full.biblio.record_list",
329         method          => "rw_biblio_ingest_record_list",
330         api_level       => 1,
331         argc            => 1,
332 );                      
333
334 sub ro_biblio_ingest_single_object {
335         my $self = shift;
336         my $client = shift;
337         my $bib = shift;
338         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
339
340         my $document = $parser->parse_string($xml);
341
342         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
343         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
344         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
345         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
346
347         $_->source($bib->id) for (@mXfe);
348         $_->record($bib->id) for (@mfr);
349         $rd->record($bib->id) if ($rd);
350
351         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
352 }
353 __PACKAGE__->register_method(  
354         api_name        => "open-ils.ingest.full.biblio.object.readonly",
355         method          => "ro_biblio_ingest_single_object",
356         api_level       => 1,
357         argc            => 1,
358 );                      
359
360 sub ro_biblio_ingest_single_xml {
361         my $self = shift;
362         my $client = shift;
363         my $xml = OpenILS::Application::Ingest::entityize(shift);
364
365         my $document = $parser->parse_string($xml);
366
367         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
368         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
369         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
370         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
371
372         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
373 }
374 __PACKAGE__->register_method(  
375         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
376         method          => "ro_biblio_ingest_single_xml",
377         api_level       => 1,
378         argc            => 1,
379 );                      
380
381 sub ro_biblio_ingest_single_record {
382         my $self = shift;
383         my $client = shift;
384         my $rec = shift;
385
386         OpenILS::Application::Ingest->post_init();
387         my $r = OpenSRF::AppSession
388                         ->create('open-ils.cstore')
389                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
390                         ->gather(1);
391
392         return undef unless ($r and @$r);
393
394         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
395
396         $_->source($rec) for (@{$res->{field_entries}});
397         $_->record($rec) for (@{$res->{full_rec}});
398         $res->{descriptor}->record($rec);
399
400         return $res;
401 }
402 __PACKAGE__->register_method(  
403         api_name        => "open-ils.ingest.full.biblio.record.readonly",
404         method          => "ro_biblio_ingest_single_record",
405         api_level       => 1,
406         argc            => 1,
407 );                      
408
409 sub ro_biblio_ingest_stream_record {
410         my $self = shift;
411         my $client = shift;
412
413         OpenILS::Application::Ingest->post_init();
414
415         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
416
417         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
418         
419                 my $rec = $resp->content;
420                 last unless (defined $rec);
421
422                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
423                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
424
425                 $_->source($rec) for (@{$res->{field_entries}});
426                 $_->record($rec) for (@{$res->{full_rec}});
427
428                 $client->respond( $res );
429         }
430
431         return undef;
432 }
433 __PACKAGE__->register_method(  
434         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
435         method          => "ro_biblio_ingest_stream_record",
436         api_level       => 1,
437         stream          => 1,
438 );                      
439
440 sub ro_biblio_ingest_stream_xml {
441         my $self = shift;
442         my $client = shift;
443
444         OpenILS::Application::Ingest->post_init();
445
446         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
447
448         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
449         
450                 my $xml = $resp->content;
451                 last unless (defined $xml);
452
453                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
454                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
455
456                 $client->respond( $res );
457         }
458
459         return undef;
460 }
461 __PACKAGE__->register_method(  
462         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
463         method          => "ro_biblio_ingest_stream_xml",
464         api_level       => 1,
465         stream          => 1,
466 );                      
467
468 sub rw_biblio_ingest_stream_import {
469         my $self = shift;
470         my $client = shift;
471
472         OpenILS::Application::Ingest->post_init();
473
474         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
475
476         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
477         
478                 my $bib = $resp->content;
479                 last unless (defined $bib);
480
481                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
482                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
483
484                 $_->source($bib->id) for (@{$res->{field_entries}});
485                 $_->record($bib->id) for (@{$res->{full_rec}});
486
487                 $client->respond( $res );
488         }
489
490         return undef;
491 }
492 __PACKAGE__->register_method(  
493         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
494         method          => "rw_biblio_ingest_stream_import",
495         api_level       => 1,
496         stream          => 1,
497 );                      
498
499
500 # --------------------------------------------------------------------------------
501 # Authority ingest
502
503 package OpenILS::Application::Ingest::Authority;
504 use base qw/OpenILS::Application::Ingest/;
505 use Unicode::Normalize;
506
507 sub ro_authority_ingest_single_object {
508         my $self = shift;
509         my $client = shift;
510         my $bib = shift;
511         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
512
513         my $document = $parser->parse_string($xml);
514
515         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
516
517         $_->record($bib->id) for (@mfr);
518
519         return { full_rec => \@mfr };
520 }
521 __PACKAGE__->register_method(  
522         api_name        => "open-ils.ingest.full.authority.object.readonly",
523         method          => "ro_authority_ingest_single_object",
524         api_level       => 1,
525         argc            => 1,
526 );                      
527
528 sub ro_authority_ingest_single_xml {
529         my $self = shift;
530         my $client = shift;
531         my $xml = OpenILS::Application::Ingest::entityize(shift);
532
533         my $document = $parser->parse_string($xml);
534
535         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
536
537         return { full_rec => \@mfr };
538 }
539 __PACKAGE__->register_method(  
540         api_name        => "open-ils.ingest.full.authority.xml.readonly",
541         method          => "ro_authority_ingest_single_xml",
542         api_level       => 1,
543         argc            => 1,
544 );                      
545
546 sub ro_authority_ingest_single_record {
547         my $self = shift;
548         my $client = shift;
549         my $rec = shift;
550
551         OpenILS::Application::Ingest->post_init();
552         my $r = OpenSRF::AppSession
553                         ->create('open-ils.cstore')
554                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
555                         ->gather(1);
556
557         return undef unless ($r and @$r);
558
559         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
560
561         $_->record($rec) for (@{$res->{full_rec}});
562         $res->{descriptor}->record($rec);
563
564         return $res;
565 }
566 __PACKAGE__->register_method(  
567         api_name        => "open-ils.ingest.full.authority.record.readonly",
568         method          => "ro_authority_ingest_single_record",
569         api_level       => 1,
570         argc            => 1,
571 );                      
572
573 sub ro_authority_ingest_stream_record {
574         my $self = shift;
575         my $client = shift;
576
577         OpenILS::Application::Ingest->post_init();
578
579         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
580
581         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
582         
583                 my $rec = $resp->content;
584                 last unless (defined $rec);
585
586                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
587                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
588
589                 $_->record($rec) for (@{$res->{full_rec}});
590
591                 $client->respond( $res );
592         }
593
594         return undef;
595 }
596 __PACKAGE__->register_method(  
597         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
598         method          => "ro_authority_ingest_stream_record",
599         api_level       => 1,
600         stream          => 1,
601 );                      
602
603 sub ro_authority_ingest_stream_xml {
604         my $self = shift;
605         my $client = shift;
606
607         OpenILS::Application::Ingest->post_init();
608
609         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
610
611         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
612         
613                 my $xml = $resp->content;
614                 last unless (defined $xml);
615
616                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
617                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
618
619                 $client->respond( $res );
620         }
621
622         return undef;
623 }
624 __PACKAGE__->register_method(  
625         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
626         method          => "ro_authority_ingest_stream_xml",
627         api_level       => 1,
628         stream          => 1,
629 );                      
630
631 sub rw_authority_ingest_stream_import {
632         my $self = shift;
633         my $client = shift;
634
635         OpenILS::Application::Ingest->post_init();
636
637         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
638
639         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
640         
641                 my $bib = $resp->content;
642                 last unless (defined $bib);
643
644                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
645                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
646
647                 $_->record($bib->id) for (@{$res->{full_rec}});
648
649                 $client->respond( $res );
650         }
651
652         return undef;
653 }
654 __PACKAGE__->register_method(  
655         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
656         method          => "rw_authority_ingest_stream_import",
657         api_level       => 1,
658         stream          => 1,
659 );                      
660
661
662 # --------------------------------------------------------------------------------
663 # MARC index extraction
664
665 package OpenILS::Application::Ingest::XPATH;
666 use base qw/OpenILS::Application::Ingest/;
667 use Unicode::Normalize;
668
669 # give this an XML documentElement and an XPATH expression
670 sub xpath_to_string {
671         my $xml = shift;
672         my $xpath = shift;
673         my $ns_uri = shift;
674         my $ns_prefix = shift;
675         my $unique = shift;
676
677         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
678
679         my $string = "";
680
681         # grab the set of matching nodes
682         my @nodes = $xml->findnodes( $xpath );
683         for my $value (@nodes) {
684
685                 # grab all children of the node
686                 my @children = $value->childNodes();
687                 for my $child (@children) {
688
689                         # add the childs content to the growing buffer
690                         my $content = quotemeta($child->textContent);
691                         next if ($unique && $string =~ /$content/);  # uniquify the values
692                         $string .= $child->textContent . " ";
693                 }
694                 if( ! @children ) {
695                         $string .= $value->textContent . " ";
696                 }
697         }
698
699     $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
700
701         return NFD($string);
702 }
703
704 sub class_index_string_xml {
705         my $self = shift;
706         my $client = shift;
707         my $xml = shift;
708         my @classes = @_;
709
710         OpenILS::Application::Ingest->post_init();
711         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
712
713         my %transform_cache;
714         
715         for my $class (@classes) {
716                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
717                 for my $type ( keys %{ $xpathset->{$class} } ) {
718
719                         my $def = $xpathset->{$class}->{$type};
720                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
721
722                         my $document = $xml;
723
724                         if ($sf->{xslt}) {
725                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
726                                 $transform_cache{$def->{format}} = $document;
727                         }
728
729                         my $value =  xpath_to_string(
730                                         $document->documentElement      => $def->{xpath},
731                                         $sf->{ns}                       => $def->{format},
732                                         1
733                         );
734
735                         next unless $value;
736
737                         $value = NFD($value);
738                         $value =~ s/\pM+//sgo;
739                         $value =~ s/\pC+//sgo;
740                         $value =~ s/\W+$//sgo;
741
742                         $value =~ s/\b\.+\b//sgo;
743                         $value = lc($value);
744
745                         my $fm = $class_constructor->new;
746                         $fm->value( $value );
747                         $fm->field( $xpathset->{$class}->{$type}->{id} );
748                         $client->respond($fm);
749                 }
750         }
751         return undef;
752 }
753 __PACKAGE__->register_method(  
754         api_name        => "open-ils.ingest.field_entry.class.xml",
755         method          => "class_index_string_xml",
756         api_level       => 1,
757         argc            => 2,
758         stream          => 1,
759 );                      
760
761 sub class_index_string_record {
762         my $self = shift;
763         my $client = shift;
764         my $rec = shift;
765         my @classes = shift;
766
767         OpenILS::Application::Ingest->post_init();
768         my $r = OpenSRF::AppSession
769                         ->create('open-ils.cstore')
770                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
771                         ->gather(1);
772
773         return undef unless ($r and @$r);
774
775         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
776                 $fm->source($rec);
777                 $client->respond($fm);
778         }
779         return undef;
780 }
781 __PACKAGE__->register_method(  
782         api_name        => "open-ils.ingest.field_entry.class.record",
783         method          => "class_index_string_record",
784         api_level       => 1,
785         argc            => 2,
786         stream          => 1,
787 );                      
788
789 sub all_index_string_xml {
790         my $self = shift;
791         my $client = shift;
792         my $xml = shift;
793
794         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
795                 $client->respond($fm);
796         }
797         return undef;
798 }
799 __PACKAGE__->register_method(  
800         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
801         method          => "all_index_string_xml",
802         api_level       => 1,
803         argc            => 1,
804         stream          => 1,
805 );                      
806
807 sub all_index_string_record {
808         my $self = shift;
809         my $client = shift;
810         my $rec = shift;
811
812         OpenILS::Application::Ingest->post_init();
813         my $r = OpenSRF::AppSession
814                         ->create('open-ils.cstore')
815                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
816                         ->gather(1);
817
818         return undef unless ($r and @$r);
819
820         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
821                 $fm->source($rec);
822                 $client->respond($fm);
823         }
824         return undef;
825 }
826 __PACKAGE__->register_method(  
827         api_name        => "open-ils.ingest.extract.field_entry.all.record",
828         method          => "all_index_string_record",
829         api_level       => 1,
830         argc            => 1,
831         stream          => 1,
832 );                      
833
834 # --------------------------------------------------------------------------------
835 # Flat MARC
836
837 package OpenILS::Application::Ingest::FlatMARC;
838 use base qw/OpenILS::Application::Ingest/;
839 use Unicode::Normalize;
840
841
842 sub _marcxml_to_full_rows {
843
844         my $marcxml = shift;
845         my $xmltype = shift || 'metabib';
846
847         my $type = "Fieldmapper::${xmltype}::full_rec";
848
849         my @ns_list;
850         
851         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
852
853         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
854                 next unless $tagline;
855
856                 my $ns = $type->new;
857
858                 $ns->tag( 'LDR' );
859                 my $val = $tagline->textContent;
860                 $val = NFD($val);
861                 $val =~ s/\pM+//sgo;
862                 $val =~ s/\pC+//sgo;
863                 $val =~ s/\W+$//sgo;
864                 $ns->value( $val );
865
866                 push @ns_list, $ns;
867         }
868
869         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
870                 next unless $tagline;
871
872                 my $ns = $type->new;
873
874                 $ns->tag( $tagline->getAttribute( "tag" ) );
875                 my $val = $tagline->textContent;
876                 $val = NFD($val);
877                 $val =~ s/\pM+//sgo;
878                 $val =~ s/\pC+//sgo;
879                 $val =~ s/\W+$//sgo;
880                 $ns->value( $val );
881
882                 push @ns_list, $ns;
883         }
884
885         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
886                 next unless $tagline;
887
888                 my $tag = $tagline->getAttribute( "tag" );
889                 my $ind1 = $tagline->getAttribute( "ind1" );
890                 my $ind2 = $tagline->getAttribute( "ind2" );
891
892                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
893                         next unless $data;
894
895                         my $ns = $type->new;
896
897                         $ns->tag( $tag );
898                         $ns->ind1( $ind1 );
899                         $ns->ind2( $ind2 );
900                         $ns->subfield( $data->getAttribute( "code" ) );
901                         my $val = $data->textContent;
902                         $val = NFD($val);
903                         $val =~ s/\pM+//sgo;
904                         $val =~ s/\pC+//sgo;
905                         $val =~ s/\W+$//sgo;
906             $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
907                         $ns->value( lc($val) );
908
909                         push @ns_list, $ns;
910                 }
911         }
912
913         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
914         return @ns_list;
915 }
916
917 sub flat_marc_xml {
918         my $self = shift;
919         my $client = shift;
920         my $xml = shift;
921
922         $log->debug("processing [$xml]");
923
924         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
925
926         my $type = 'metabib';
927         $type = 'authority' if ($self->api_name =~ /authority/o);
928
929         OpenILS::Application::Ingest->post_init();
930
931         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
932         return undef;
933 }
934 __PACKAGE__->register_method(  
935         api_name        => "open-ils.ingest.flat_marc.authority.xml",
936         method          => "flat_marc_xml",
937         api_level       => 1,
938         argc            => 1,
939         stream          => 1,
940 );                      
941 __PACKAGE__->register_method(  
942         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
943         method          => "flat_marc_xml",
944         api_level       => 1,
945         argc            => 1,
946         stream          => 1,
947 );                      
948
949 sub flat_marc_record {
950         my $self = shift;
951         my $client = shift;
952         my $rec = shift;
953
954         my $type = 'biblio';
955         $type = 'authority' if ($self->api_name =~ /authority/o);
956
957         OpenILS::Application::Ingest->post_init();
958         my $r = OpenSRF::AppSession
959                         ->create('open-ils.cstore')
960                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
961                         ->gather(1);
962
963
964         return undef unless ($r and $r->marc);
965
966         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
967         for my $row (@rows) {
968                 $client->respond($row);
969                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
970         }
971         return undef;
972 }
973 __PACKAGE__->register_method(  
974         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
975         method          => "flat_marc_record",
976         api_level       => 1,
977         argc            => 1,
978         stream          => 1,
979 );                      
980 __PACKAGE__->register_method(  
981         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
982         method          => "flat_marc_record",
983         api_level       => 1,
984         argc            => 1,
985         stream          => 1,
986 );                      
987
988 # --------------------------------------------------------------------------------
989 # Fingerprinting
990
991 package OpenILS::Application::Ingest::Biblio::Fingerprint;
992 use base qw/OpenILS::Application::Ingest/;
993 use Unicode::Normalize;
994 use OpenSRF::EX qw/:try/;
995
996 sub biblio_fingerprint_record {
997         my $self = shift;
998         my $client = shift;
999         my $rec = shift;
1000
1001         OpenILS::Application::Ingest->post_init();
1002
1003         my $r = OpenSRF::AppSession
1004                         ->create('open-ils.cstore')
1005                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1006                         ->gather(1);
1007
1008         return undef unless ($r and $r->marc);
1009
1010         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1011         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1012         $fp->{quality} = int($fp->{quality});
1013         return $fp;
1014 }
1015 __PACKAGE__->register_method(  
1016         api_name        => "open-ils.ingest.fingerprint.record",
1017         method          => "biblio_fingerprint_record",
1018         api_level       => 1,
1019         argc            => 1,
1020 );                      
1021
1022 our $fp_script;
1023 sub biblio_fingerprint {
1024         my $self = shift;
1025         my $client = shift;
1026         my $xml = OpenILS::Application::Ingest::entityize(shift);
1027
1028         $log->internal("Got MARC [$xml]");
1029
1030         if(!$fp_script) {
1031                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1032                 my $conf = OpenSRF::Utils::SettingsClient->new;
1033
1034                 my $libs        = $conf->config_value(@pfx, 'script_path');
1035                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1036                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1037
1038                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1039                 
1040                 $fp_script = new OpenILS::Utils::ScriptRunner
1041                         ( file          => $script_file,
1042                           paths         => $script_libs,
1043                           reset_count   => 100 );
1044         }
1045
1046         $fp_script->insert('environment' => {marc => $xml} => 1);
1047
1048         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1049         $log->debug("Script for biblio fingerprinting completed successfully...");
1050
1051         return $res;
1052 }
1053 __PACKAGE__->register_method(  
1054         api_name        => "open-ils.ingest.fingerprint.xml",
1055         method          => "biblio_fingerprint",
1056         api_level       => 1,
1057         argc            => 1,
1058 );                      
1059
1060 our $rd_script;
1061 sub biblio_descriptor {
1062         my $self = shift;
1063         my $client = shift;
1064         my $xml = OpenILS::Application::Ingest::entityize(shift);
1065
1066         $log->internal("Got MARC [$xml]");
1067
1068         if(!$rd_script) {
1069                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1070                 my $conf = OpenSRF::Utils::SettingsClient->new;
1071
1072                 my $libs        = $conf->config_value(@pfx, 'script_path');
1073                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1074                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1075
1076                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1077                 
1078                 $rd_script = new OpenILS::Utils::ScriptRunner
1079                         ( file          => $script_file,
1080                           paths         => $script_libs,
1081                           reset_count   => 100 );
1082         }
1083
1084         $log->debug("Setting up environment for descriptor extraction script...");
1085         $rd_script->insert('environment.marc' => $xml => 1);
1086         $log->debug("Environment building complete...");
1087
1088         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1089         $log->debug("Script for biblio descriptor extraction completed successfully");
1090
1091     my $d1 = $res->date1;
1092     if ($d1 && $d1 ne '    ') {
1093         $d1 =~ tr/ux/00/;
1094         $res->date1( $d1 );
1095     }
1096
1097     my $d2 = $res->date2;
1098     if ($d2 && $d2 ne '    ') {
1099         $d2 =~ tr/ux/99/;
1100         $res->date2( $d2 );
1101     }
1102
1103         return $res;
1104 }
1105 __PACKAGE__->register_method(  
1106         api_name        => "open-ils.ingest.descriptor.xml",
1107         method          => "biblio_descriptor",
1108         api_level       => 1,
1109         argc            => 1,
1110 );                      
1111
1112
1113 1;
1114