]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
44dc795d5eeaefbf2e1a34300db57f3f7f00832f
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use OpenSRF::Utils::JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21 our %supported_formats = (
22         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
23         mods    => {ns => 'http://www.loc.gov/mods/'},
24         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
26         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28         atom    => {ns => 'http://www.w3.org/2005/Atom'},
29         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
30         rss092  => {ns => ''},
31         rss093  => {ns => ''},
32         rss094  => {ns => ''},
33         rss10   => {ns => 'http://purl.org/rss/1.0/'},
34         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
35         rss2    => {ns => ''},
36 );
37
38
39 my $log = 'OpenSRF::Utils::Logger';
40
41 my  $parser = XML::LibXML->new();
42 my  $xslt = XML::LibXSLT->new();
43
44 my  $mods_sheet;
45 my  $mads_sheet;
46 my  $xpathset = {};
47 sub initialize {}
48 sub child_init {}
49
50 sub post_init {
51
52         unless (keys %$xpathset) {
53                 $log->debug("Running post_init", DEBUG);
54
55                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56
57                 unless ($supported_formats{mods}{xslt}) {
58                         $log->debug("Loading MODS XSLT", DEBUG);
59                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61                 }
62
63                 unless ($supported_formats{mods3}{xslt}) {
64                         $log->debug("Loading MODS v3 XSLT", DEBUG);
65                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67                 }
68
69
70                 my $req = OpenSRF::AppSession
71                                 ->create('open-ils.cstore')
72                                 
73                                 # XXX testing new metabib field use for faceting
74                                 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
76
77                                 ->gather(1);
78
79                 if (ref $req and @$req) {
80                         for my $f (@$req) {
81                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
82                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
83                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
84                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
85                         }
86                 }
87         }
88 }
89
90 sub entityize {
91         my $stuff = shift;
92         my $form = shift;
93
94         if ($form eq 'D') {
95                 $stuff = NFD($stuff);
96         } else {
97                 $stuff = NFC($stuff);
98         }
99
100         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
101         return $stuff;
102 }
103
104 # --------------------------------------------------------------------------------
105 # Biblio ingest
106
107 package OpenILS::Application::Ingest::Biblio;
108 use base qw/OpenILS::Application::Ingest/;
109 use Unicode::Normalize;
110
111 sub rw_biblio_ingest_single_object {
112         my $self = shift;
113         my $client = shift;
114         my $bib = shift;
115
116         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
117         return undef unless ($blob);
118
119         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
120         $bib->quality( $blob->{fingerprint}->{quality} );
121
122         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
123
124         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
125
126         # update full_rec stuff ...
127         my $tmp = $cstore->request(
128                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
129                 { record => $bib->id }
130         )->gather(1);
131
132         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
133         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
134
135         # update rec_descriptor stuff ...
136         $tmp = $cstore->request(
137                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
138                 { record => $bib->id }
139         )->gather(1);
140
141         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
142         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
143
144         # deal with classed fields...
145         for my $class ( qw/title author subject keyword series/ ) {
146                 $tmp = $cstore->request(
147                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
148                         { source => $bib->id }
149                 )->gather(1);
150
151                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
152         }
153         for my $obj ( @{ $blob->{field_entries} } ) {
154                 my $class = $obj->class_name;
155                 $class =~ s/^Fieldmapper:://o;
156                 $class =~ s/::/./go;
157                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
158         }
159
160         # update MR map ...
161
162         $tmp = $cstore->request(
163                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
164                 { source => $bib->id }
165         )->gather(1);
166
167         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
168
169         # get the old MRs
170         my $old_mrs = $cstore->request(
171                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
172         )->gather(1) if (@$tmp);
173
174         $old_mrs = [] if (!ref($old_mrs));
175
176         my $mr;
177         for my $m (@$old_mrs) {
178                 if ($m->fingerprint eq $bib->fingerprint) {
179                         $mr = $m;
180                 } else {
181                         my $others = $cstore->request(
182                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
183                         )->gather(1);
184
185                         if (!@$others) {
186                                 $cstore->request(
187                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
188                                 )->gather(1);
189                         }
190
191                         $m->isdeleted(1);
192                 }
193         }
194
195         my $holds;
196         if (!$mr) {
197                 # Get the matchin MR, if any.
198                 $mr = $cstore->request(
199                         'open-ils.cstore.direct.metabib.metarecord.search',
200                         { fingerprint => $bib->fingerprint }
201                 )->gather(1);
202
203                 $holds = $cstore->request(
204                         'open-ils.cstore.direct.action.hold_request.search.atomic',
205                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
206                 )->gather(1) if (@$old_mrs);
207
208                 if ($mr) {
209                         for my $h (@$holds) {
210                                 $h->target($mr);
211                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
212                                 $h->ischanged(1);
213                         }
214                 }
215         }
216
217         if (!$mr) {
218                 $mr = new Fieldmapper::metabib::metarecord;
219                 $mr->fingerprint( $bib->fingerprint );
220                 $mr->master_record( $bib->id );
221                 $mr->id(
222                         $cstore->request(
223                                 "open-ils.cstore.direct.metabib.metarecord.create",
224                                 $mr => { quiet => 'true' }
225                         )->gather(1)
226                 );
227
228                 for my $h (grep { !$_->ischanged } @$holds) {
229                         $h->target($mr);
230                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
231                 }
232         } else {
233                 my $mrm = $cstore->request(
234                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
235                         { metarecord => $mr->id }
236                 )->gather(1);
237
238                 if (@$mrm) {
239                         my $best = $cstore->request(
240                                 "open-ils.cstore.direct.biblio.record_entry.search",
241                                 { id => [ map { $_->source } @$mrm ] },
242                                 { 'select'      => { bre => [ qw/id quality/ ] },
243                                 order_by        => { bre => "quality desc" },
244                                 limit           => 1,
245                                 }
246                         )->gather(1);
247
248                         if ($best->quality > $bib->quality) {
249                                 $mr->master_record($best->id);
250                         } else {
251                                 $mr->master_record($bib->id);
252                         }
253                 } else {
254                         $mr->master_record($bib->id);
255                 }
256
257                 $mr->clear_mods;
258
259                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
260         }
261
262         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
263         $mrm->source($bib->id);
264         $mrm->metarecord($mr->id);
265
266         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
267         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
268
269         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
270
271         return $bib->id;
272 }
273 __PACKAGE__->register_method(  
274         api_name        => "open-ils.ingest.full.biblio.object",
275         method          => "rw_biblio_ingest_single_object",
276         api_level       => 1,
277         argc            => 1,
278 );                      
279
280 sub rw_biblio_ingest_single_record {
281         my $self = shift;
282         my $client = shift;
283         my $rec = shift;
284
285         OpenILS::Application::Ingest->post_init();
286         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
287         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
288
289         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
290
291         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
292         $cstore->disconnect;
293
294         return undef unless ($r and @$r);
295
296         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
297 }
298 __PACKAGE__->register_method(  
299         api_name        => "open-ils.ingest.full.biblio.record",
300         method          => "rw_biblio_ingest_single_record",
301         api_level       => 1,
302         argc            => 1,
303 );                      
304
305 sub ro_biblio_ingest_single_object {
306         my $self = shift;
307         my $client = shift;
308         my $bib = shift;
309         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
310
311         my $document = $parser->parse_string($xml);
312
313         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
314         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
315         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
316         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
317
318         $_->source($bib->id) for (@mXfe);
319         $_->record($bib->id) for (@mfr);
320         $rd->record($bib->id) if ($rd);
321
322         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
323 }
324 __PACKAGE__->register_method(  
325         api_name        => "open-ils.ingest.full.biblio.object.readonly",
326         method          => "ro_biblio_ingest_single_object",
327         api_level       => 1,
328         argc            => 1,
329 );                      
330
331 sub ro_biblio_ingest_single_xml {
332         my $self = shift;
333         my $client = shift;
334         my $xml = OpenILS::Application::Ingest::entityize(shift);
335
336         my $document = $parser->parse_string($xml);
337
338         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
339         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
340         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
341         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
342
343         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
344 }
345 __PACKAGE__->register_method(  
346         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
347         method          => "ro_biblio_ingest_single_xml",
348         api_level       => 1,
349         argc            => 1,
350 );                      
351
352 sub ro_biblio_ingest_single_record {
353         my $self = shift;
354         my $client = shift;
355         my $rec = shift;
356
357         OpenILS::Application::Ingest->post_init();
358         my $r = OpenSRF::AppSession
359                         ->create('open-ils.cstore')
360                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
361                         ->gather(1);
362
363         return undef unless ($r and @$r);
364
365         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
366
367         $_->source($rec) for (@{$res->{field_entries}});
368         $_->record($rec) for (@{$res->{full_rec}});
369         $res->{descriptor}->record($rec);
370
371         return $res;
372 }
373 __PACKAGE__->register_method(  
374         api_name        => "open-ils.ingest.full.biblio.record.readonly",
375         method          => "ro_biblio_ingest_single_record",
376         api_level       => 1,
377         argc            => 1,
378 );                      
379
380 sub ro_biblio_ingest_stream_record {
381         my $self = shift;
382         my $client = shift;
383
384         OpenILS::Application::Ingest->post_init();
385
386         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
387
388         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
389         
390                 my $rec = $resp->content;
391                 last unless (defined $rec);
392
393                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
394                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
395
396                 $_->source($rec) for (@{$res->{field_entries}});
397                 $_->record($rec) for (@{$res->{full_rec}});
398
399                 $client->respond( $res );
400         }
401
402         return undef;
403 }
404 __PACKAGE__->register_method(  
405         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
406         method          => "ro_biblio_ingest_stream_record",
407         api_level       => 1,
408         stream          => 1,
409 );                      
410
411 sub ro_biblio_ingest_stream_xml {
412         my $self = shift;
413         my $client = shift;
414
415         OpenILS::Application::Ingest->post_init();
416
417         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
418
419         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
420         
421                 my $xml = $resp->content;
422                 last unless (defined $xml);
423
424                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
425                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
426
427                 $client->respond( $res );
428         }
429
430         return undef;
431 }
432 __PACKAGE__->register_method(  
433         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
434         method          => "ro_biblio_ingest_stream_xml",
435         api_level       => 1,
436         stream          => 1,
437 );                      
438
439 sub rw_biblio_ingest_stream_import {
440         my $self = shift;
441         my $client = shift;
442
443         OpenILS::Application::Ingest->post_init();
444
445         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
446
447         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
448         
449                 my $bib = $resp->content;
450                 last unless (defined $bib);
451
452                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
453                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
454
455                 $_->source($bib->id) for (@{$res->{field_entries}});
456                 $_->record($bib->id) for (@{$res->{full_rec}});
457
458                 $client->respond( $res );
459         }
460
461         return undef;
462 }
463 __PACKAGE__->register_method(  
464         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
465         method          => "rw_biblio_ingest_stream_import",
466         api_level       => 1,
467         stream          => 1,
468 );                      
469
470
471 # --------------------------------------------------------------------------------
472 # Authority ingest
473
474 package OpenILS::Application::Ingest::Authority;
475 use base qw/OpenILS::Application::Ingest/;
476 use Unicode::Normalize;
477
478 sub ro_authority_ingest_single_object {
479         my $self = shift;
480         my $client = shift;
481         my $bib = shift;
482         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
483
484         my $document = $parser->parse_string($xml);
485
486         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
487
488         $_->record($bib->id) for (@mfr);
489
490         return { full_rec => \@mfr };
491 }
492 __PACKAGE__->register_method(  
493         api_name        => "open-ils.ingest.full.authority.object.readonly",
494         method          => "ro_authority_ingest_single_object",
495         api_level       => 1,
496         argc            => 1,
497 );                      
498
499 sub ro_authority_ingest_single_xml {
500         my $self = shift;
501         my $client = shift;
502         my $xml = OpenILS::Application::Ingest::entityize(shift);
503
504         my $document = $parser->parse_string($xml);
505
506         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
507
508         return { full_rec => \@mfr };
509 }
510 __PACKAGE__->register_method(  
511         api_name        => "open-ils.ingest.full.authority.xml.readonly",
512         method          => "ro_authority_ingest_single_xml",
513         api_level       => 1,
514         argc            => 1,
515 );                      
516
517 sub ro_authority_ingest_single_record {
518         my $self = shift;
519         my $client = shift;
520         my $rec = shift;
521
522         OpenILS::Application::Ingest->post_init();
523         my $r = OpenSRF::AppSession
524                         ->create('open-ils.cstore')
525                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
526                         ->gather(1);
527
528         return undef unless ($r and @$r);
529
530         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
531
532         $_->record($rec) for (@{$res->{full_rec}});
533         $res->{descriptor}->record($rec);
534
535         return $res;
536 }
537 __PACKAGE__->register_method(  
538         api_name        => "open-ils.ingest.full.authority.record.readonly",
539         method          => "ro_authority_ingest_single_record",
540         api_level       => 1,
541         argc            => 1,
542 );                      
543
544 sub ro_authority_ingest_stream_record {
545         my $self = shift;
546         my $client = shift;
547
548         OpenILS::Application::Ingest->post_init();
549
550         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
551
552         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
553         
554                 my $rec = $resp->content;
555                 last unless (defined $rec);
556
557                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
558                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
559
560                 $_->record($rec) for (@{$res->{full_rec}});
561
562                 $client->respond( $res );
563         }
564
565         return undef;
566 }
567 __PACKAGE__->register_method(  
568         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
569         method          => "ro_authority_ingest_stream_record",
570         api_level       => 1,
571         stream          => 1,
572 );                      
573
574 sub ro_authority_ingest_stream_xml {
575         my $self = shift;
576         my $client = shift;
577
578         OpenILS::Application::Ingest->post_init();
579
580         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
581
582         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
583         
584                 my $xml = $resp->content;
585                 last unless (defined $xml);
586
587                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
588                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
589
590                 $client->respond( $res );
591         }
592
593         return undef;
594 }
595 __PACKAGE__->register_method(  
596         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
597         method          => "ro_authority_ingest_stream_xml",
598         api_level       => 1,
599         stream          => 1,
600 );                      
601
602 sub rw_authority_ingest_stream_import {
603         my $self = shift;
604         my $client = shift;
605
606         OpenILS::Application::Ingest->post_init();
607
608         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
609
610         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
611         
612                 my $bib = $resp->content;
613                 last unless (defined $bib);
614
615                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
616                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
617
618                 $_->record($bib->id) for (@{$res->{full_rec}});
619
620                 $client->respond( $res );
621         }
622
623         return undef;
624 }
625 __PACKAGE__->register_method(  
626         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
627         method          => "rw_authority_ingest_stream_import",
628         api_level       => 1,
629         stream          => 1,
630 );                      
631
632
633 # --------------------------------------------------------------------------------
634 # MARC index extraction
635
636 package OpenILS::Application::Ingest::XPATH;
637 use base qw/OpenILS::Application::Ingest/;
638 use Unicode::Normalize;
639
640 # give this an XML documentElement and an XPATH expression
641 sub xpath_to_string {
642         my $xml = shift;
643         my $xpath = shift;
644         my $ns_uri = shift;
645         my $ns_prefix = shift;
646         my $unique = shift;
647
648         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
649
650         my $string = "";
651
652         # grab the set of matching nodes
653         my @nodes = $xml->findnodes( $xpath );
654         for my $value (@nodes) {
655
656                 # grab all children of the node
657                 my @children = $value->childNodes();
658                 for my $child (@children) {
659
660                         # add the childs content to the growing buffer
661                         my $content = quotemeta($child->textContent);
662                         next if ($unique && $string =~ /$content/);  # uniquify the values
663                         $string .= $child->textContent . " ";
664                 }
665                 if( ! @children ) {
666                         $string .= $value->textContent . " ";
667                 }
668         }
669         return NFD($string);
670 }
671
672 sub class_index_string_xml {
673         my $self = shift;
674         my $client = shift;
675         my $xml = shift;
676         my @classes = @_;
677
678         OpenILS::Application::Ingest->post_init();
679         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
680
681         my %transform_cache;
682         
683         for my $class (@classes) {
684                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
685                 for my $type ( keys %{ $xpathset->{$class} } ) {
686
687                         my $def = $xpathset->{$class}->{$type};
688                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
689
690                         my $document = $xml;
691
692                         if ($sf->{xslt}) {
693                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
694                                 $transform_cache{$def->{format}} = $document;
695                         }
696
697                         my $value =  xpath_to_string(
698                                         $document->documentElement      => $def->{xpath},
699                                         $sf->{ns}                       => $def->{format},
700                                         1
701                         );
702
703                         next unless $value;
704
705                         $value = NFD($value);
706                         $value =~ s/\pM+//sgo;
707                         $value =~ s/\pC+//sgo;
708                         $value =~ s/\W+$//sgo;
709
710                         $value =~ s/\b\.+\b//sgo;
711                         $value = lc($value);
712
713                         my $fm = $class_constructor->new;
714                         $fm->value( $value );
715                         $fm->field( $xpathset->{$class}->{$type}->{id} );
716                         $client->respond($fm);
717                 }
718         }
719         return undef;
720 }
721 __PACKAGE__->register_method(  
722         api_name        => "open-ils.ingest.field_entry.class.xml",
723         method          => "class_index_string_xml",
724         api_level       => 1,
725         argc            => 2,
726         stream          => 1,
727 );                      
728
729 sub class_index_string_record {
730         my $self = shift;
731         my $client = shift;
732         my $rec = shift;
733         my @classes = shift;
734
735         OpenILS::Application::Ingest->post_init();
736         my $r = OpenSRF::AppSession
737                         ->create('open-ils.cstore')
738                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
739                         ->gather(1);
740
741         return undef unless ($r and @$r);
742
743         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
744                 $fm->source($rec);
745                 $client->respond($fm);
746         }
747         return undef;
748 }
749 __PACKAGE__->register_method(  
750         api_name        => "open-ils.ingest.field_entry.class.record",
751         method          => "class_index_string_record",
752         api_level       => 1,
753         argc            => 2,
754         stream          => 1,
755 );                      
756
757 sub all_index_string_xml {
758         my $self = shift;
759         my $client = shift;
760         my $xml = shift;
761
762         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
763                 $client->respond($fm);
764         }
765         return undef;
766 }
767 __PACKAGE__->register_method(  
768         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
769         method          => "all_index_string_xml",
770         api_level       => 1,
771         argc            => 1,
772         stream          => 1,
773 );                      
774
775 sub all_index_string_record {
776         my $self = shift;
777         my $client = shift;
778         my $rec = shift;
779
780         OpenILS::Application::Ingest->post_init();
781         my $r = OpenSRF::AppSession
782                         ->create('open-ils.cstore')
783                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
784                         ->gather(1);
785
786         return undef unless ($r and @$r);
787
788         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
789                 $fm->source($rec);
790                 $client->respond($fm);
791         }
792         return undef;
793 }
794 __PACKAGE__->register_method(  
795         api_name        => "open-ils.ingest.extract.field_entry.all.record",
796         method          => "all_index_string_record",
797         api_level       => 1,
798         argc            => 1,
799         stream          => 1,
800 );                      
801
802 # --------------------------------------------------------------------------------
803 # Flat MARC
804
805 package OpenILS::Application::Ingest::FlatMARC;
806 use base qw/OpenILS::Application::Ingest/;
807 use Unicode::Normalize;
808
809
810 sub _marcxml_to_full_rows {
811
812         my $marcxml = shift;
813         my $xmltype = shift || 'metabib';
814
815         my $type = "Fieldmapper::${xmltype}::full_rec";
816
817         my @ns_list;
818         
819         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
820
821         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
822                 next unless $tagline;
823
824                 my $ns = $type->new;
825
826                 $ns->tag( 'LDR' );
827                 my $val = $tagline->textContent;
828                 $val = NFD($val);
829                 $val =~ s/\pM+//sgo;
830                 $val =~ s/\pC+//sgo;
831                 $val =~ s/\W+$//sgo;
832                 $ns->value( $val );
833
834                 push @ns_list, $ns;
835         }
836
837         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
838                 next unless $tagline;
839
840                 my $ns = $type->new;
841
842                 $ns->tag( $tagline->getAttribute( "tag" ) );
843                 my $val = $tagline->textContent;
844                 $val = NFD($val);
845                 $val =~ s/\pM+//sgo;
846                 $val =~ s/\pC+//sgo;
847                 $val =~ s/\W+$//sgo;
848                 $ns->value( $val );
849
850                 push @ns_list, $ns;
851         }
852
853         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
854                 next unless $tagline;
855
856                 my $tag = $tagline->getAttribute( "tag" );
857                 my $ind1 = $tagline->getAttribute( "ind1" );
858                 my $ind2 = $tagline->getAttribute( "ind2" );
859
860                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
861                         next unless $data;
862
863                         my $ns = $type->new;
864
865                         $ns->tag( $tag );
866                         $ns->ind1( $ind1 );
867                         $ns->ind2( $ind2 );
868                         $ns->subfield( $data->getAttribute( "code" ) );
869                         my $val = $data->textContent;
870                         $val = NFD($val);
871                         $val =~ s/\pM+//sgo;
872                         $val =~ s/\pC+//sgo;
873                         $val =~ s/\W+$//sgo;
874                         $ns->value( lc($val) );
875
876                         push @ns_list, $ns;
877                 }
878         }
879
880         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
881         return @ns_list;
882 }
883
884 sub flat_marc_xml {
885         my $self = shift;
886         my $client = shift;
887         my $xml = shift;
888
889         $log->debug("processing [$xml]");
890
891         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
892
893         my $type = 'metabib';
894         $type = 'authority' if ($self->api_name =~ /authority/o);
895
896         OpenILS::Application::Ingest->post_init();
897
898         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
899         return undef;
900 }
901 __PACKAGE__->register_method(  
902         api_name        => "open-ils.ingest.flat_marc.authority.xml",
903         method          => "flat_marc_xml",
904         api_level       => 1,
905         argc            => 1,
906         stream          => 1,
907 );                      
908 __PACKAGE__->register_method(  
909         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
910         method          => "flat_marc_xml",
911         api_level       => 1,
912         argc            => 1,
913         stream          => 1,
914 );                      
915
916 sub flat_marc_record {
917         my $self = shift;
918         my $client = shift;
919         my $rec = shift;
920
921         my $type = 'biblio';
922         $type = 'authority' if ($self->api_name =~ /authority/o);
923
924         OpenILS::Application::Ingest->post_init();
925         my $r = OpenSRF::AppSession
926                         ->create('open-ils.cstore')
927                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
928                         ->gather(1);
929
930
931         return undef unless ($r and $r->marc);
932
933         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
934         for my $row (@rows) {
935                 $client->respond($row);
936                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
937         }
938         return undef;
939 }
940 __PACKAGE__->register_method(  
941         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
942         method          => "flat_marc_record",
943         api_level       => 1,
944         argc            => 1,
945         stream          => 1,
946 );                      
947 __PACKAGE__->register_method(  
948         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
949         method          => "flat_marc_record",
950         api_level       => 1,
951         argc            => 1,
952         stream          => 1,
953 );                      
954
955 # --------------------------------------------------------------------------------
956 # Fingerprinting
957
958 package OpenILS::Application::Ingest::Biblio::Fingerprint;
959 use base qw/OpenILS::Application::Ingest/;
960 use Unicode::Normalize;
961 use OpenSRF::EX qw/:try/;
962
963 sub biblio_fingerprint_record {
964         my $self = shift;
965         my $client = shift;
966         my $rec = shift;
967
968         OpenILS::Application::Ingest->post_init();
969
970         my $r = OpenSRF::AppSession
971                         ->create('open-ils.cstore')
972                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
973                         ->gather(1);
974
975         return undef unless ($r and $r->marc);
976
977         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
978         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
979         $fp->{quality} = int($fp->{quality});
980         return $fp;
981 }
982 __PACKAGE__->register_method(  
983         api_name        => "open-ils.ingest.fingerprint.record",
984         method          => "biblio_fingerprint_record",
985         api_level       => 1,
986         argc            => 1,
987 );                      
988
989 our $fp_script;
990 sub biblio_fingerprint {
991         my $self = shift;
992         my $client = shift;
993         my $xml = OpenILS::Application::Ingest::entityize(shift);
994
995         $log->internal("Got MARC [$xml]");
996
997         if(!$fp_script) {
998                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
999                 my $conf = OpenSRF::Utils::SettingsClient->new;
1000
1001                 my $libs        = $conf->config_value(@pfx, 'script_path');
1002                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1003                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1004
1005                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1006                 
1007                 $fp_script = new OpenILS::Utils::ScriptRunner
1008                         ( file          => $script_file,
1009                           paths         => $script_libs,
1010                           reset_count   => 100 );
1011         }
1012
1013         $fp_script->insert('environment' => {marc => $xml} => 1);
1014
1015         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1016         $log->debug("Script for biblio fingerprinting completed successfully...");
1017
1018         return $res;
1019 }
1020 __PACKAGE__->register_method(  
1021         api_name        => "open-ils.ingest.fingerprint.xml",
1022         method          => "biblio_fingerprint",
1023         api_level       => 1,
1024         argc            => 1,
1025 );                      
1026
1027 our $rd_script;
1028 sub biblio_descriptor {
1029         my $self = shift;
1030         my $client = shift;
1031         my $xml = OpenILS::Application::Ingest::entityize(shift);
1032
1033         $log->internal("Got MARC [$xml]");
1034
1035         if(!$rd_script) {
1036                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1037                 my $conf = OpenSRF::Utils::SettingsClient->new;
1038
1039                 my $libs        = $conf->config_value(@pfx, 'script_path');
1040                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1041                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1042
1043                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1044                 
1045                 $rd_script = new OpenILS::Utils::ScriptRunner
1046                         ( file          => $script_file,
1047                           paths         => $script_libs,
1048                           reset_count   => 100 );
1049         }
1050
1051         $log->debug("Setting up environment for descriptor extraction script...");
1052         $rd_script->insert('environment.marc' => $xml => 1);
1053         $log->debug("Environment building complete...");
1054
1055         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1056         $log->debug("Script for biblio descriptor extraction completed successfully");
1057
1058         return $res;
1059 }
1060 __PACKAGE__->register_method(  
1061         api_name        => "open-ils.ingest.descriptor.xml",
1062         method          => "biblio_descriptor",
1063         api_level       => 1,
1064         argc            => 1,
1065 );                      
1066
1067
1068 1;
1069
1070 __END__
1071
1072 sub in_transaction {
1073         OpenILS::Application::Ingest->post_init();
1074         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1075 }
1076
1077 sub begin_transaction {
1078         my $self = shift;
1079         my $client = shift;
1080         
1081         OpenILS::Application::Ingest->post_init();
1082         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1083         
1084         try {
1085                 if (!$outer_xact) {
1086                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1087                         #__PACKAGE__->st_sess->connect;
1088                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1089                         unless (defined $r and $r) {
1090                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1091                                 #__PACKAGE__->st_sess->disconnect;
1092                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1093                         }
1094                 }
1095         } otherwise {
1096                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1097         };
1098
1099         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1100 }
1101
1102 sub rollback_transaction {
1103         my $self = shift;
1104         my $client = shift;
1105
1106         OpenILS::Application::Ingest->post_init();
1107         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1108
1109         try {
1110                 if ($outer_xact) {
1111                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1112                 } else {
1113                         $log->debug("Ingest isn't inside a transaction.", INFO);
1114                 }
1115         } catch Error with {
1116                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1117         };
1118
1119         return 1;
1120 }
1121
1122 sub commit_transaction {
1123         my $self = shift;
1124         my $client = shift;
1125
1126         OpenILS::Application::Ingest->post_init();
1127         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1128
1129         try {
1130                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1131                 if ($outer_xact) {
1132                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1133                         unless (defined $r and $r) {
1134                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1135                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1136                         }
1137                         #__PACKAGE__->st_sess->disconnect;
1138                 } else {
1139                         $log->debug("Ingest isn't inside a transaction.", INFO);
1140                 }
1141         } catch Error with {
1142                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1143         };
1144
1145         return 1;
1146 }
1147
1148 sub storage_req {
1149         my $self = shift;
1150         my $method = shift;
1151         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1152         return shift( @res );
1153 }
1154
1155 sub scrub_authority_record {
1156         my $self = shift;
1157         my $client = shift;
1158         my $rec = shift;
1159
1160         my $commit = 0;
1161         if (!OpenILS::Application::Ingest->in_transaction) {
1162                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1163                 $commit = 1;
1164         }
1165
1166         my $success = 1;
1167         try {
1168                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1169
1170                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1171                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1172
1173                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1174         } otherwise {
1175                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1176                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1177                 $success = 0;
1178         };
1179
1180         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1181         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1182         return $success;
1183 }
1184 __PACKAGE__->register_method(  
1185         api_name        => "open-ils.worm.scrub.authority",
1186         method          => "scrub_authority_record",
1187         api_level       => 1,
1188         argc            => 1,
1189 );                      
1190
1191
1192 sub scrub_metabib_record {
1193         my $self = shift;
1194         my $client = shift;
1195         my $rec = shift;
1196
1197         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1198                 $rec = OpenILS::Application::Ingest->storage_req(
1199                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1200                 );
1201         }
1202
1203         my $commit = 0;
1204         if (!OpenILS::Application::Ingest->in_transaction) {
1205                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1206                 $commit = 1;
1207         }
1208
1209         my $success = 1;
1210         try {
1211                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1212                 
1213                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1214                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1215                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1216                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1217                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1218                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1219                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1220                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1221
1222                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1223                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1224
1225                 for my $mr (@$masters) {
1226                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1227                         my $others = OpenILS::Application::Ingest->storage_req(
1228                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1229
1230                         if (@$others) {
1231                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1232                                 $mr->master_record($others->[0]->source);
1233                                 OpenILS::Application::Ingest->storage_req(
1234                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1235                                         { id => $mr->id },
1236                                         { master_record => $others->[0]->source, mods => undef }
1237                                 );
1238                         } else {
1239                                 warn "Removing metarecord whose master is $rec";
1240                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1241                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1242                                 warn "Metarecord removed";
1243                                 $log->debug( "Metarecord removed", DEBUG);
1244                         }
1245                 }
1246
1247                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1248
1249         } otherwise {
1250                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1251                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1252                 $success = 0;
1253         };
1254
1255         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1256         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1257         return $success;
1258 }
1259 __PACKAGE__->register_method(  
1260         api_name        => "open-ils.worm.scrub.biblio",
1261         method          => "scrub_metabib_record",
1262         api_level       => 1,
1263         argc            => 1,
1264 );                      
1265
1266 sub wormize_biblio_metarecord {
1267         my $self = shift;
1268         my $client = shift;
1269         my $mrec = shift;
1270
1271         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1272
1273         my $count = 0;
1274         for my $r (@$recs) {
1275                 my $success = 0;
1276                 try {
1277                         $success = wormize_biblio_record($self => $client => $r->source);
1278                         $client->respond(
1279                                 { record  => $r->source,
1280                                   metarecord => $rec->metarecord,
1281                                   success => $success,
1282                                 }
1283                         );
1284                 } catch Error with {
1285                         my $e = shift;
1286                         $client->respond(
1287                                 { record  => $r->source,
1288                                   metarecord => $rec->metarecord,
1289                                   success => $success,
1290                                   error   => $e,
1291                                 }
1292                         );
1293                 };
1294         }
1295         return undef;
1296 }
1297 __PACKAGE__->register_method(
1298         api_name        => "open-ils.worm.wormize.metarecord",
1299         method          => "wormize_biblio_metarecord",
1300         api_level       => 1,
1301         argc            => 1,
1302         stream          => 1,
1303 );
1304 __PACKAGE__->register_method(
1305         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1306         method          => "wormize_biblio_metarecord",
1307         api_level       => 1,
1308         argc            => 1,
1309         stream          => 1,
1310 );
1311 __PACKAGE__->register_method(
1312         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1313         method          => "wormize_biblio_metarecord",
1314         api_level       => 1,
1315         argc            => 1,
1316         stream          => 1,
1317 );
1318 __PACKAGE__->register_method(
1319         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1320         method          => "wormize_biblio_metarecord",
1321         api_level       => 1,
1322         argc            => 1,
1323         stream          => 1,
1324 );
1325
1326
1327 sub wormize_biblio_record {
1328         my $self = shift;
1329         my $client = shift;
1330         my $rec = shift;
1331
1332         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1333                 $rec = OpenILS::Application::Ingest->storage_req(
1334                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1335                 );
1336         }
1337
1338
1339         my $commit = 0;
1340         if (!OpenILS::Application::Ingest->in_transaction) {
1341                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1342                 $commit = 1;
1343         }
1344
1345         my $success = 1;
1346         try {
1347                 # clean up the cruft
1348                 unless ($self->api_name =~ /noscrub/o) {
1349                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1350                 }
1351
1352                 # now redo 'em
1353                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1354
1355                 my @full_rec = ();
1356                 my @rec_descriptor = ();
1357                 my %field_entry = (
1358                         title   => [],
1359                         author  => [],
1360                         subject => [],
1361                         keyword => [],
1362                         series  => [],
1363                 );
1364                 my %metarecord = ();
1365                 my @source_map = ();
1366                 for my $r (@$bibs) {
1367                         try {
1368                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1369
1370                                 my $xml = $parser->parse_string($r->marc);
1371
1372                                 #update the fingerprint
1373                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1374                                 OpenILS::Application::Ingest->storage_req(
1375                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1376                                         { id => $r->id },
1377                                         { fingerprint => $fp->{fingerprint},
1378                                           quality     => int($fp->{quality}) }
1379                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1380
1381                                 # the full_rec stuff
1382                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1383                                         $fr->record( $r->id );
1384                                         push @full_rec, $fr;
1385                                 }
1386
1387                                 # the rec_descriptor stuff
1388                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1389                                 $rd->record( $r->id );
1390                                 push @rec_descriptor, $rd;
1391                         
1392                                 # the indexing field entry stuff
1393                                 for my $class ( qw/title author subject keyword series/ ) {
1394                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1395                                                 $fe->source( $r->id );
1396                                                 push @{$field_entry{$class}}, $fe;
1397                                         }
1398                                 }
1399
1400                                 unless ($self->api_name =~ /nomap/o) {
1401                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1402                                 
1403                                         unless ($mr) {
1404                                                 $mr = Fieldmapper::metabib::metarecord->new;
1405                                                 $mr->fingerprint( $fp->{fingerprint} );
1406                                                 $mr->master_record( $r->id );
1407                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1408                                         }
1409
1410                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1411                                         $mr_map->metarecord( $mr->id );
1412                                         $mr_map->source( $r->id );
1413                                         push @source_map, $mr_map;
1414
1415                                         $metarecord{$mr->id} = $mr;
1416                                 }
1417                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1418                         } otherwise {
1419                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1420                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1421                         };
1422                 }
1423                 
1424
1425                 if (@rec_descriptor) {
1426                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1427
1428                         OpenILS::Application::Ingest->storage_req(
1429                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1430                                 @source_map
1431                         ) if (@source_map);
1432
1433                         for my $mr ( values %metarecord ) {
1434                                 my $sources = OpenILS::Application::Ingest->storage_req(
1435                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1436                                         $mr->id
1437                                 );
1438
1439                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1440                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1441                                         [ map { $_->source } @$sources ]
1442                                 );
1443
1444                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1445
1446                                 OpenILS::Application::Ingest->storage_req(
1447                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1448                                         { id => $mr->id },
1449                                         { master_record => $master->id, mods => undef }
1450                                 );
1451                         }
1452
1453                         OpenILS::Application::Ingest->storage_req(
1454                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1455                                 @rec_descriptor
1456                         ) if (@rec_descriptor);
1457
1458                         OpenILS::Application::Ingest->storage_req(
1459                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1460                                 @full_rec
1461                         ) if (@full_rec);
1462
1463                         OpenILS::Application::Ingest->storage_req(
1464                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1465                                 @{ $field_entry{title} }
1466                         ) if (@{ $field_entry{title} });
1467
1468                         OpenILS::Application::Ingest->storage_req(
1469                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1470                                 @{ $field_entry{author} }
1471                         ) if (@{ $field_entry{author} });
1472                         
1473                         OpenILS::Application::Ingest->storage_req(
1474                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1475                                 @{ $field_entry{subject} }
1476                         ) if (@{ $field_entry{subject} });
1477
1478                         OpenILS::Application::Ingest->storage_req(
1479                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1480                                 @{ $field_entry{keyword} }
1481                         ) if (@{ $field_entry{keyword} });
1482
1483                         OpenILS::Application::Ingest->storage_req(
1484                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1485                                 @{ $field_entry{series} }
1486                         ) if (@{ $field_entry{series} });
1487
1488                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1489                 } else {
1490                         $success = 0;
1491                 }
1492
1493         } otherwise {
1494                 $log->debug('Wormization failed : '.shift(), ERROR);
1495                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1496                 $success = 0;
1497         };
1498
1499         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1500         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1501         return $success;
1502 }
1503 __PACKAGE__->register_method(
1504         api_name        => "open-ils.worm.wormize.biblio",
1505         method          => "wormize_biblio_record",
1506         api_level       => 1,
1507         argc            => 1,
1508 );
1509 __PACKAGE__->register_method(
1510         api_name        => "open-ils.worm.wormize.biblio.nomap",
1511         method          => "wormize_biblio_record",
1512         api_level       => 1,
1513         argc            => 1,
1514 );
1515 __PACKAGE__->register_method(
1516         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1517         method          => "wormize_biblio_record",
1518         api_level       => 1,
1519         argc            => 1,
1520 );
1521 __PACKAGE__->register_method(
1522         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1523         method          => "wormize_biblio_record",
1524         api_level       => 1,
1525         argc            => 1,
1526 );
1527
1528 sub wormize_authority_record {
1529         my $self = shift;
1530         my $client = shift;
1531         my $rec = shift;
1532
1533         my $commit = 0;
1534         if (!OpenILS::Application::Ingest->in_transaction) {
1535                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1536                 $commit = 1;
1537         }
1538
1539         my $success = 1;
1540         try {
1541                 # clean up the cruft
1542                 unless ($self->api_name =~ /noscrub/o) {
1543                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1544                 }
1545
1546                 # now redo 'em
1547                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1548
1549                 my @full_rec = ();
1550                 my @rec_descriptor = ();
1551                 for my $r (@$bibs) {
1552                         my $xml = $parser->parse_string($r->marc);
1553
1554                         # the full_rec stuff
1555                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1556                                 $fr->record( $r->id );
1557                                 push @full_rec, $fr;
1558                         }
1559
1560                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1561                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1562                         #$rd->record( $r->id );
1563                         #push @rec_descriptor, $rd;
1564                         
1565                 }
1566
1567                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1568
1569                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1570                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1571
1572                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1573
1574         } otherwise {
1575                 $log->debug('Wormization failed : '.shift(), ERROR);
1576                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1577                 $success = 0;
1578         };
1579
1580         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1581         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1582         return $success;
1583 }
1584 __PACKAGE__->register_method(
1585         api_name        => "open-ils.worm.wormize.authority",
1586         method          => "wormize_authority_record",
1587         api_level       => 1,
1588         argc            => 1,
1589 );
1590 __PACKAGE__->register_method(
1591         api_name        => "open-ils.worm.wormize.authority.noscrub",
1592         method          => "wormize_authority_record",
1593         api_level       => 1,
1594         argc            => 1,
1595 );
1596
1597
1598 # --------------------------------------------------------------------------------
1599 # MARC index extraction
1600
1601 package OpenILS::Application::Ingest::XPATH;
1602 use base qw/OpenILS::Application::Ingest/;
1603 use Unicode::Normalize;
1604
1605 # give this a MODS documentElement and an XPATH expression
1606 sub _xpath_to_string {
1607         my $xml = shift;
1608         my $xpath = shift;
1609         my $ns_uri = shift;
1610         my $ns_prefix = shift;
1611         my $unique = shift;
1612
1613         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1614
1615         my $string = "";
1616
1617         # grab the set of matching nodes
1618         my @nodes = $xml->findnodes( $xpath );
1619         for my $value (@nodes) {
1620
1621                 # grab all children of the node
1622                 my @children = $value->childNodes();
1623                 for my $child (@children) {
1624
1625                         # add the childs content to the growing buffer
1626                         my $content = quotemeta($child->textContent);
1627                         next if ($unique && $string =~ /$content/);  # uniquify the values
1628                         $string .= $child->textContent . " ";
1629                 }
1630                 if( ! @children ) {
1631                         $string .= $value->textContent . " ";
1632                 }
1633         }
1634         return NFD($string);
1635 }
1636
1637 sub class_all_index_string_xml {
1638         my $self = shift;
1639         my $client = shift;
1640         my $xml = shift;
1641         my $class = shift;
1642
1643         OpenILS::Application::Ingest->post_init();
1644         $xml = $parser->parse_string($xml) unless (ref $xml);
1645         
1646         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1647         for my $type ( keys %{ $xpathset->{$class} } ) {
1648                 my $value =  _xpath_to_string(
1649                                 $mods_sheet->transform($xml)->documentElement,
1650                                 $xpathset->{$class}->{$type}->{xpath},
1651                                 "http://www.loc.gov/mods/",
1652                                 "mods",
1653                                 1
1654                 );
1655
1656                 next unless $value;
1657
1658                 $value = NFD($value);
1659                 $value =~ s/\pM+//sgo;
1660                 $value =~ s/\pC+//sgo;
1661                 $value =~ s/\W+$//sgo;
1662
1663                 $value =~ s/(\w)\./$1/sgo;
1664                 $value = lc($value);
1665
1666                 my $fm = $class_constructor->new;
1667                 $fm->value( $value );
1668                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1669                 $client->respond($fm);
1670         }
1671         return undef;
1672 }
1673 __PACKAGE__->register_method(  
1674         api_name        => "open-ils.worm.field_entry.class.xml",
1675         method          => "class_all_index_string_xml",
1676         api_level       => 1,
1677         argc            => 1,
1678         stream          => 1,
1679 );                      
1680
1681 sub class_all_index_string_record {
1682         my $self = shift;
1683         my $client = shift;
1684         my $rec = shift;
1685         my $class = shift;
1686
1687         OpenILS::Application::Ingest->post_init();
1688         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1689
1690         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1691                 $fm->source($rec);
1692                 $client->respond($fm);
1693         }
1694         return undef;
1695 }
1696 __PACKAGE__->register_method(  
1697         api_name        => "open-ils.worm.field_entry.class.record",
1698         method          => "class_all_index_string_record",
1699         api_level       => 1,
1700         argc            => 1,
1701         stream          => 1,
1702 );                      
1703
1704
1705 sub class_index_string_xml {
1706         my $self = shift;
1707         my $client = shift;
1708         my $xml = shift;
1709         my $class = shift;
1710         my $type = shift;
1711
1712         OpenILS::Application::Ingest->post_init();
1713         $xml = $parser->parse_string($xml) unless (ref $xml);
1714         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1715 }
1716 __PACKAGE__->register_method(  
1717         api_name        => "open-ils.worm.class.type.xml",
1718         method          => "class_index_string_xml",
1719         api_level       => 1,
1720         argc            => 1,
1721 );                      
1722
1723 sub class_index_string_record {
1724         my $self = shift;
1725         my $client = shift;
1726         my $rec = shift;
1727         my $class = shift;
1728         my $type = shift;
1729
1730         OpenILS::Application::Ingest->post_init();
1731         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1732
1733         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1734         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1735         return $d;
1736 }
1737 __PACKAGE__->register_method(  
1738         api_name        => "open-ils.worm.class.type.record",
1739         method          => "class_index_string_record",
1740         api_level       => 1,
1741         argc            => 1,
1742 );                      
1743
1744 sub xml_xpath {
1745         my $self = shift;
1746         my $client = shift;
1747         my $xml = shift;
1748         my $xpath = shift;
1749         my $uri = shift;
1750         my $prefix = shift;
1751         my $unique = shift;
1752
1753         OpenILS::Application::Ingest->post_init();
1754         $xml = $parser->parse_string($xml) unless (ref $xml);
1755         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1756 }
1757 __PACKAGE__->register_method(  
1758         api_name        => "open-ils.worm.xpath.xml",
1759         method          => "xml_xpath",
1760         api_level       => 1,
1761         argc            => 1,
1762 );                      
1763
1764 sub record_xpath {
1765         my $self = shift;
1766         my $client = shift;
1767         my $rec = shift;
1768         my $xpath = shift;
1769         my $uri = shift;
1770         my $prefix = shift;
1771         my $unique = shift;
1772
1773         OpenILS::Application::Ingest->post_init();
1774         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1775
1776         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1777         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1778         return $d;
1779 }
1780 __PACKAGE__->register_method(  
1781         api_name        => "open-ils.worm.xpath.record",
1782         method          => "record_xpath",
1783         api_level       => 1,
1784         argc            => 1,
1785 );                      
1786
1787
1788 # --------------------------------------------------------------------------------
1789 # MARC Descriptor
1790
1791 package OpenILS::Application::Ingest::Biblio::Leader;
1792 use base qw/OpenILS::Application::Ingest/;
1793 use Unicode::Normalize;
1794
1795 our %marc_type_groups = (
1796         BKS => q/[at]{1}/,
1797         SER => q/[a]{1}/,
1798         VIS => q/[gkro]{1}/,
1799         MIX => q/[p]{1}/,
1800         MAP => q/[ef]{1}/,
1801         SCO => q/[cd]{1}/,
1802         REC => q/[ij]{1}/,
1803         COM => q/[m]{1}/,
1804 );
1805
1806 sub _type_re {
1807         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1808         return qr/$re/;
1809 }
1810
1811 our %biblio_descriptor_code = (
1812         item_type => sub { substr($ldr,6,1); },
1813         item_form =>
1814                 sub {
1815                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1816                                 return substr($oo8,29,1);
1817                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1818                                 return substr($oo8,23,1);
1819                         }
1820                         return ' ';
1821                 },
1822         bib_level => sub { substr($ldr,7,1); },
1823         control_type => sub { substr($ldr,8,1); },
1824         char_encoding => sub { substr($ldr,9,1); },
1825         enc_level => sub { substr($ldr,17,1); },
1826         cat_form => sub { substr($ldr,18,1); },
1827         pub_status => sub { substr($ldr,5,1); },
1828         item_lang => sub { substr($oo8,35,3); },
1829         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1830         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1831         audience => sub { substr($oo8,22,1); },
1832 );
1833
1834 sub _extract_biblio_descriptors {
1835         my $xml = shift;
1836
1837         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1838         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1839         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1840
1841         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1842         for my $rd_field ( keys %biblio_descriptor_code ) {
1843                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1844         }
1845
1846         return $rd_obj;
1847 }
1848
1849 sub extract_biblio_desc_xml {
1850         my $self = shift;
1851         my $client = shift;
1852         my $xml = shift;
1853
1854         $xml = $parser->parse_string($xml) unless (ref $xml);
1855
1856         return _extract_biblio_descriptors( $xml );
1857 }
1858 __PACKAGE__->register_method(  
1859         api_name        => "open-ils.worm.biblio_leader.xml",
1860         method          => "extract_biblio_desc_xml",
1861         api_level       => 1,
1862         argc            => 1,
1863 );                      
1864
1865 sub extract_biblio_desc_record {
1866         my $self = shift;
1867         my $client = shift;
1868         my $rec = shift;
1869
1870         OpenILS::Application::Ingest->post_init();
1871         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1872
1873         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1874         $log->debug("Record descriptor for bib rec $rec is ".OpenSRF::Utils::JSON->perl2JSON($d), DEBUG);
1875         return $d;
1876 }
1877 __PACKAGE__->register_method(  
1878         api_name        => "open-ils.worm.biblio_leader.record",
1879         method          => "extract_biblio_desc_record",
1880         api_level       => 1,
1881         argc            => 1,
1882 );                      
1883
1884 # --------------------------------------------------------------------------------
1885 # Flat MARC
1886
1887 package OpenILS::Application::Ingest::FlatMARC;
1888 use base qw/OpenILS::Application::Ingest/;
1889 use Unicode::Normalize;
1890
1891
1892 sub _marcxml_to_full_rows {
1893
1894         my $marcxml = shift;
1895         my $xmltype = shift || 'metabib';
1896
1897         my $type = "Fieldmapper::${xmltype}::full_rec";
1898
1899         my @ns_list;
1900         
1901         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1902
1903         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1904                 next unless $tagline;
1905
1906                 my $ns = $type->new;
1907
1908                 $ns->tag( 'LDR' );
1909                 my $val = $tagline->textContent;
1910                 $val = NFD($val);
1911                 $val =~ s/\pM+//sgo;
1912                 $val =~ s/\pC+//sgo;
1913                 $val =~ s/\W+$//sgo;
1914                 $ns->value( $val );
1915
1916                 push @ns_list, $ns;
1917         }
1918
1919         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1920                 next unless $tagline;
1921
1922                 my $ns = $type->new;
1923
1924                 $ns->tag( $tagline->getAttribute( "tag" ) );
1925                 my $val = $tagline->textContent;
1926                 $val = NFD($val);
1927                 $val =~ s/\pM+//sgo;
1928                 $val =~ s/\pC+//sgo;
1929                 $val =~ s/\W+$//sgo;
1930                 $ns->value( $val );
1931
1932                 push @ns_list, $ns;
1933         }
1934
1935         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1936                 next unless $tagline;
1937
1938                 my $tag = $tagline->getAttribute( "tag" );
1939                 my $ind1 = $tagline->getAttribute( "ind1" );
1940                 my $ind2 = $tagline->getAttribute( "ind2" );
1941
1942                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1943                         next unless $data;
1944
1945                         my $ns = $type->new;
1946
1947                         $ns->tag( $tag );
1948                         $ns->ind1( $ind1 );
1949                         $ns->ind2( $ind2 );
1950                         $ns->subfield( $data->getAttribute( "code" ) );
1951                         my $val = $data->textContent;
1952                         $val = NFD($val);
1953                         $val =~ s/\pM+//sgo;
1954                         $val =~ s/\pC+//sgo;
1955                         $val =~ s/\W+$//sgo;
1956                         $ns->value( lc($val) );
1957
1958                         push @ns_list, $ns;
1959                 }
1960         }
1961
1962         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1963         return @ns_list;
1964 }
1965
1966 sub flat_marc_xml {
1967         my $self = shift;
1968         my $client = shift;
1969         my $xml = shift;
1970
1971         $xml = $parser->parse_string($xml) unless (ref $xml);
1972
1973         my $type = 'metabib';
1974         $type = 'authority' if ($self->api_name =~ /authority/o);
1975
1976         OpenILS::Application::Ingest->post_init();
1977
1978         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1979         return undef;
1980 }
1981 __PACKAGE__->register_method(  
1982         api_name        => "open-ils.worm.flat_marc.authority.xml",
1983         method          => "flat_marc_xml",
1984         api_level       => 1,
1985         argc            => 1,
1986         stream          => 1,
1987 );                      
1988 __PACKAGE__->register_method(  
1989         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1990         method          => "flat_marc_xml",
1991         api_level       => 1,
1992         argc            => 1,
1993         stream          => 1,
1994 );                      
1995
1996 sub flat_marc_record {
1997         my $self = shift;
1998         my $client = shift;
1999         my $rec = shift;
2000
2001         my $type = 'biblio';
2002         $type = 'authority' if ($self->api_name =~ /authority/o);
2003
2004         OpenILS::Application::Ingest->post_init();
2005         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
2006
2007         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
2008         return undef;
2009 }
2010 __PACKAGE__->register_method(  
2011         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
2012         method          => "flat_marc_record",
2013         api_level       => 1,
2014         argc            => 1,
2015         stream          => 1,
2016 );                      
2017 __PACKAGE__->register_method(  
2018         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
2019         method          => "flat_marc_record",
2020         api_level       => 1,
2021         argc            => 1,
2022         stream          => 1,
2023 );                      
2024
2025
2026 # --------------------------------------------------------------------------------
2027 # Fingerprinting
2028
2029 package OpenILS::Application::Ingest::Biblio::Fingerprint;
2030 use base qw/OpenILS::Application::Ingest/;
2031 use Unicode::Normalize;
2032 use OpenSRF::EX qw/:try/;
2033
2034 my @fp_mods_xpath = (
2035         '//mods:mods/mods:typeOfResource[text()="text"]' => [
2036                         title   => {
2037                                         xpath   => [
2038                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2039                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2040                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2041                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2042                                         ],
2043                                         fixup   => sub {
2044                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2045                                                         $text = NFD($text);
2046                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2047                                                         $text =~ s/\pM+//gso;
2048                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2049                                                         $text = lc($text);
2050                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2051                                                         $text =~ s/\s+/ /sgo;
2052                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2053                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2054                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2055                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2056                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2057                                                         $text =~ s/\[.[^\]]+\]//sgo;
2058                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2059                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2060                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2061                                                 },
2062                         },
2063                         author  => {
2064                                         xpath   => [
2065                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2066                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2067                                         ],
2068                                         fixup   => sub {
2069                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2070                                                         $text = NFD($text);
2071                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2072                                                         $text =~ s/\pM+//gso;
2073                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2074                                                         $text = lc($text);
2075                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076                                                         $text =~ s/\s+/ /sgo;
2077                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2078                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2079                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2080                                                         $text =~ s/,?\s+.*$//sgo;
2081                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2082                                                 },
2083                         },
2084         ],
2085
2086         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2087                         title   => {
2088                                         xpath   => [
2089                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2090                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2091                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2092                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2093                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2094                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2095                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2096                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2097                                         ],
2098                                         fixup   => sub {
2099                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2100                                                         $text = NFD($text);
2101                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2102                                                         $text =~ s/\pM+//gso;
2103                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2104                                                         $text = lc($text);
2105                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2106                                                         $text =~ s/\s+/ /sgo;
2107                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2108                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2109                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2110                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2111                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2112                                                         $text =~ s/\[.[^\]]+\]//sgo;
2113                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2114                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2115                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2116                                                 },
2117                         },
2118                         author  => {
2119                                         xpath   => [
2120                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2121                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2122                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2123                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2124                                         ],
2125                                         fixup   => sub {
2126                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2127                                                         $text = NFD($text);
2128                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2129                                                         $text =~ s/\pM+//gso;
2130                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2131                                                         $text = lc($text);
2132                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2133                                                         $text =~ s/\s+/ /sgo;
2134                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2135                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2136                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2137                                                         $text =~ s/,?\s+.*$//sgo;
2138                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2139                                                 },
2140                         },
2141         ],
2142
2143 );
2144
2145 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2146
2147 sub _fp_mods {
2148         my $mods = shift;
2149         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2150
2151         my $fp_string = '';
2152
2153         my $match_index = 0;
2154         my $block_index = 1;
2155         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2156                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2157
2158                         my $block_name_index = 0;
2159                         my $block_value_index = 1;
2160                         my $block = $fp_mods_xpath[$block_index];
2161                         while ( my $part = $$block[$block_value_index] ) {
2162                                 local $text;
2163                                 for my $xpath ( @{ $part->{xpath} } ) {
2164                                         $text = $mods->findvalue( $xpath );
2165                                         last if ($text);
2166                                 }
2167
2168                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2169
2170                                 if ($text) {
2171                                         $$part{fixup}->();
2172                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2173                                         $fp_string .= $text;
2174                                 }
2175
2176                                 $block_name_index += 2;
2177                                 $block_value_index += 2;
2178                         }
2179                 }
2180                 if ($fp_string) {
2181                         $fp_string =~ s/\W+//gso;
2182                         $log->debug("Fingerprint is [$fp_string]", INFO);;
2183                         return $fp_string;
2184                 }
2185
2186                 $match_index += 2;
2187                 $block_index += 2;
2188         }
2189         return undef;
2190 }
2191
2192 sub refingerprint_bibrec {
2193         my $self = shift;
2194         my $client = shift;
2195         my $rec = shift;
2196
2197         my $commit = 0;
2198         if (!OpenILS::Application::Ingest->in_transaction) {
2199                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2200                 $commit = 1;
2201         }
2202
2203         my $success = 1;
2204         try {
2205                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2206                 for my $b (@$bibs) {
2207                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2208
2209                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2210
2211                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2212
2213                                 OpenILS::Application::Ingest->storage_req(
2214                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2215                                         { id => $b->id },
2216                                         { fingerprint => $fp->{fingerprint},
2217                                           quality     => $fp->{quality} }
2218                                 );
2219
2220                                 if ($self->api_name !~ /nomap/o) {
2221                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2222                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2223                                                 $b->id
2224                                         );
2225
2226                                         my $old_mrid;
2227                                         if (ref($old_source_map) and @$old_source_map) {
2228                                                 for my $m (@$old_source_map) {
2229                                                         $old_mrid = $m->metarecord;
2230                                                         OpenILS::Application::Ingest->storage_req(
2231                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2232                                                                 $m->id
2233                                                         );
2234                                                 }
2235                                         }
2236
2237                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2238                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2239                                                         { metarecord => $old_mrid }
2240                                         ) if ($old_mrid);
2241
2242                                         if (ref($old_sm) and @$old_sm == 0) {
2243                                                 OpenILS::Application::Ingest->storage_req(
2244                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2245                                                         $old_mrid
2246                                                 );
2247                                         }
2248
2249                                         my $mr = OpenILS::Application::Ingest->storage_req(
2250                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2251                                                         { fingerprint => $fp->{fingerprint} }
2252                                         )->[0];
2253                                 
2254                                         unless ($mr) {
2255                                                 $mr = Fieldmapper::metabib::metarecord->new;
2256                                                 $mr->fingerprint( $fp->{fingerprint} );
2257                                                 $mr->master_record( $b->id );
2258                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2259                                         }
2260
2261                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2262                                         $mr_map->metarecord( $mr->id );
2263                                         $mr_map->source( $b->id );
2264                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2265
2266                                 }
2267                         }
2268                         $client->respond($b->id);
2269                 }
2270
2271         } otherwise {
2272                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2273                 $success = 0;
2274         };
2275
2276         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2277         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2278         return undef;
2279 }
2280 __PACKAGE__->register_method(  
2281         api_name        => "open-ils.worm.fingerprint.record.update",
2282         method          => "refingerprint_bibrec",
2283         api_level       => 1,
2284         argc            => 1,
2285         stream          => 1,
2286 );                      
2287
2288 __PACKAGE__->register_method(  
2289         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2290         method          => "refingerprint_bibrec",
2291         api_level       => 1,
2292         argc            => 1,
2293 );                      
2294
2295 =comment
2296
2297 sub fingerprint_bibrec {
2298         my $self = shift;
2299         my $client = shift;
2300         my $rec = shift;
2301
2302         OpenILS::Application::Ingest->post_init();
2303         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2304
2305         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2306         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2307         return $fp;
2308
2309 }
2310 __PACKAGE__->register_method(  
2311         api_name        => "open-ils.worm.fingerprint.record",
2312         method          => "fingerprint_bibrec",
2313         api_level       => 0,
2314         argc            => 1,
2315 );                      
2316
2317
2318 sub fingerprint_mods {
2319         my $self = shift;
2320         my $client = shift;
2321         my $xml = shift;
2322
2323         OpenILS::Application::Ingest->post_init();
2324         my $mods = $parser->parse_string($xml)->documentElement;
2325
2326         return _fp_mods( $mods );
2327 }
2328 __PACKAGE__->register_method(  
2329         api_name        => "open-ils.worm.fingerprint.mods",
2330         method          => "fingerprint_mods",
2331         api_level       => 1,
2332         argc            => 1,
2333 );                      
2334
2335 sub fingerprint_marc {
2336         my $self = shift;
2337         my $client = shift;
2338         my $xml = shift;
2339
2340         $xml = $parser->parse_string($xml) unless (ref $xml);
2341
2342         OpenILS::Application::Ingest->post_init();
2343         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2344         $log->debug("Returning [$fp] as fingerprint", INFO);
2345         return $fp;
2346 }
2347 __PACKAGE__->register_method(  
2348         api_name        => "open-ils.worm.fingerprint.marc",
2349         method          => "fingerprint_marc",
2350         api_level       => 1,
2351         argc            => 1,
2352 );                      
2353
2354
2355 =cut
2356
2357 sub biblio_fingerprint_record {
2358         my $self = shift;
2359         my $client = shift;
2360         my $rec = shift;
2361
2362         OpenILS::Application::Ingest->post_init();
2363
2364         my $marc = OpenILS::Application::Ingest
2365                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2366                         ->marc;
2367
2368         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2369         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2370         return $fp;
2371 }
2372 __PACKAGE__->register_method(  
2373         api_name        => "open-ils.worm.fingerprint.record",
2374         method          => "biblio_fingerprint_record",
2375         api_level       => 1,
2376         argc            => 1,
2377 );                      
2378
2379 our $fp_script;
2380 sub biblio_fingerprint {
2381         my $self = shift;
2382         my $client = shift;
2383         my $marc = shift;
2384
2385         OpenILS::Application::Ingest->post_init();
2386
2387         $marc = $parser->parse_string($marc) unless (ref $marc);
2388
2389         my $mods = OpenILS::Application::Ingest::entityize(
2390                 $mods_sheet
2391                         ->transform( $marc )
2392                         ->documentElement
2393                         ->toString,
2394                 'D'
2395         );
2396
2397         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2398
2399         warn $marc;
2400         $log->internal("Got MARC [$marc]");
2401         $log->internal("Created MODS [$mods]");
2402
2403         if(!$fp_script) {
2404                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2405                 my $conf = OpenSRF::Utils::SettingsClient->new;
2406
2407                 my $libs        = $conf->config_value(@pfx, 'script_path');
2408                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2409                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2410
2411                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2412                 
2413                 $fp_script = new OpenILS::Utils::ScriptRunner
2414                         ( file          => $script_file,
2415                           paths         => $script_libs,
2416                           reset_count   => 1000 );
2417         }
2418
2419         $log->debug("Applying environment for biblio fingerprinting...");
2420
2421         my $env = {marc => $marc, mods => $mods};
2422         #my $res = {fingerprint => '', quality => '0'};
2423
2424         $fp_script->insert('environment' => $env);
2425         #$fp_script->insert('result' => $res);
2426
2427         $log->debug("Running script for biblio fingerprinting...");
2428
2429         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2430
2431         $log->debug("Script for biblio fingerprinting completed successfully...");
2432
2433         return $res;
2434 }
2435 __PACKAGE__->register_method(  
2436         api_name        => "open-ils.worm.fingerprint.marc",
2437         method          => "biblio_fingerprint",
2438         api_level       => 1,
2439         argc            => 1,
2440 );                      
2441
2442 # --------------------------------------------------------------------------------
2443
2444 1;
2445
2446 __END__
2447 my $in_xact;
2448 my $begin;
2449 my $commit;
2450 my $rollback;
2451 my $lookup;
2452 my $update_entry;
2453 my $mr_lookup;
2454 my $mr_update;
2455 my $mr_create;
2456 my $create_source_map;
2457 my $sm_lookup;
2458 my $rm_old_rd;
2459 my $rm_old_sm;
2460 my $rm_old_fr;
2461 my $rm_old_tr;
2462 my $rm_old_ar;
2463 my $rm_old_sr;
2464 my $rm_old_kr;
2465 my $rm_old_ser;
2466
2467 my $fr_create;
2468 my $rd_create;
2469 my $create = {};
2470
2471 my %descriptor_code = (
2472         item_type => 'substr($ldr,6,1)',
2473         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2474         bib_level => 'substr($ldr,7,1)',
2475         control_type => 'substr($ldr,8,1)',
2476         char_encoding => 'substr($ldr,9,1)',
2477         enc_level => 'substr($ldr,17,1)',
2478         cat_form => 'substr($ldr,18,1)',
2479         pub_status => 'substr($ldr,5,1)',
2480         item_lang => 'substr($oo8,35,3)',
2481         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2482         audience => 'substr($oo8,22,1)',
2483 );
2484
2485 sub wormize {
2486
2487         my $self = shift;
2488         my $client = shift;
2489         my @docids = @_;
2490
2491         my $no_map = 0;
2492         if ($self->api_name =~ /no_map/o) {
2493                 $no_map = 1;
2494         }
2495
2496         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2497                 unless ($in_xact);
2498         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2499                 unless ($begin);
2500         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2501                 unless ($commit);
2502         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2503                 unless ($rollback);
2504         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2505                 unless ($sm_lookup);
2506         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2507                 unless ($mr_lookup);
2508         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2509                 unless ($mr_update);
2510         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2511                 unless ($lookup);
2512         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2513                 unless ($update_entry);
2514         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2515                 unless ($rm_old_sm);
2516         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2517                 unless ($rm_old_rd);
2518         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2519                 unless ($rm_old_fr);
2520         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2521                 unless ($rm_old_tr);
2522         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2523                 unless ($rm_old_ar);
2524         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2525                 unless ($rm_old_sr);
2526         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2527                 unless ($rm_old_kr);
2528         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2529                 unless ($rm_old_ser);
2530         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2531                 unless ($mr_create);
2532         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2533                 unless ($create_source_map);
2534         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2535                 unless ($rd_create);
2536         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2537                 unless ($fr_create);
2538         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2539                 unless ($$create{title});
2540         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2541                 unless ($$create{author});
2542         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2543                 unless ($$create{subject});
2544         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2545                 unless ($$create{keyword});
2546         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2547                 unless ($$create{series});
2548
2549
2550         my ($outer_xact) = $in_xact->run;
2551         try {
2552                 unless ($outer_xact) {
2553                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2554                         my ($r) = $begin->run($client);
2555                         unless (defined $r and $r) {
2556                                 $rollback->run;
2557                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2558                         }
2559                 }
2560         } catch Error with {
2561                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2562         };
2563
2564         my @source_maps;
2565         my @entry_list;
2566         my @mr_list;
2567         my @rd_list;
2568         my @ns_list;
2569         my @mods_data;
2570         my $ret = 0;
2571         for my $entry ( $lookup->run(@docids) ) {
2572                 # step -1: grab the doc from storage
2573                 next unless ($entry);
2574
2575                 if(!$mods_sheet) {
2576                         my $xslt_doc = $parser->parse_file(
2577                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2578                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2579                 }
2580
2581                 my $xml = $entry->marc;
2582                 my $docid = $entry->id;
2583                 my $marcdoc = $parser->parse_string($xml);
2584                 my $modsdoc = $mods_sheet->transform($marcdoc);
2585
2586                 my $mods = $modsdoc->documentElement;
2587                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2588
2589                 $entry->fingerprint( fingerprint_mods( $mods ) );
2590                 push @entry_list, $entry;
2591
2592                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2593
2594                 unless ($no_map) {
2595                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2596                         if (!$mr || !@$mr) {
2597                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2598                                 $mr = new Fieldmapper::metabib::metarecord;
2599                                 $mr->fingerprint( $entry->fingerprint );
2600                                 $mr->master_record( $entry->id );
2601                                 my ($new_mr) = $mr_create->run($mr);
2602                                 $mr->id($new_mr);
2603                                 unless (defined $mr) {
2604                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2605                                 }
2606                         } else {
2607                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2608                                 $mr->mods('');
2609                                 push @mr_list, $mr;
2610                         }
2611
2612                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2613                         $sm->metarecord( $mr->id );
2614                         $sm->source( $entry->id );
2615                         push @source_maps, $sm;
2616                 }
2617
2618                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2619                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2620
2621                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2622                 for my $rd_field ( keys %descriptor_code ) {
2623                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2624                 }
2625                 $rd_obj->record( $docid );
2626                 push @rd_list, $rd_obj;
2627
2628                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2629
2630                 # step 2: build the KOHA rows
2631                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2632                 $_->record( $docid ) for (@tmp_list);
2633                 push @ns_list, @tmp_list;
2634
2635                 $ret++;
2636
2637                 last unless ($self->api_name =~ /batch$/o);
2638         }
2639
2640         $rm_old_rd->run( { record => \@docids } );
2641         $rm_old_fr->run( { record => \@docids } );
2642         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2643         $rm_old_tr->run( { source => \@docids } );
2644         $rm_old_ar->run( { source => \@docids } );
2645         $rm_old_sr->run( { source => \@docids } );
2646         $rm_old_kr->run( { source => \@docids } );
2647         $rm_old_ser->run( { source => \@docids } );
2648
2649         unless ($no_map) {
2650                 my ($sm) = $create_source_map->run(@source_maps);
2651                 unless (defined $sm) {
2652                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2653                 }
2654                 my ($mr) = $mr_update->run(@mr_list);
2655                 unless (defined $mr) {
2656                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2657                 }
2658         }
2659
2660         my ($re) = $update_entry->run(@entry_list);
2661         unless (defined $re) {
2662                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2663         }
2664
2665         my ($rd) = $rd_create->run(@rd_list);
2666         unless (defined $rd) {
2667                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2668         }
2669
2670         my ($fr) = $fr_create->run(@ns_list);
2671         unless (defined $fr) {
2672                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2673         }
2674
2675         # step 5: insert the new metadata
2676         for my $class ( qw/title author subject keyword series/ ) {
2677                 my @md_list = ();
2678                 for my $doc ( @mods_data ) {
2679                         my ($did) = keys %$doc;
2680                         my ($data) = values %$doc;
2681
2682                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2683                         for my $row ( keys %{ $$data{$class} } ) {
2684                                 next unless (exists $$data{$class}{$row});
2685                                 next unless ($$data{$class}{$row}{value});
2686                                 my $fm_obj = $fm_constructor->new;
2687                                 $fm_obj->value( $$data{$class}{$row}{value} );
2688                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2689                                 $fm_obj->source( $did );
2690                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2691
2692                                 push @md_list, $fm_obj;
2693                         }
2694                 }
2695                         
2696                 my ($cr) = $$create{$class}->run(@md_list);
2697                 unless (defined $cr) {
2698                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2699                 }
2700         }
2701
2702         unless ($outer_xact) {
2703                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2704                 my ($c) = $commit->run;
2705                 unless (defined $c and $c) {
2706                         $rollback->run;
2707                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2708                 }
2709         }
2710
2711         return $ret;
2712 }
2713 __PACKAGE__->register_method( 
2714         api_name        => "open-ils.worm.wormize",
2715         method          => "wormize",
2716         api_level       => 1,
2717         argc            => 1,
2718 );
2719 __PACKAGE__->register_method( 
2720         api_name        => "open-ils.worm.wormize.no_map",
2721         method          => "wormize",
2722         api_level       => 1,
2723         argc            => 1,
2724 );
2725 __PACKAGE__->register_method( 
2726         api_name        => "open-ils.worm.wormize.batch",
2727         method          => "wormize",
2728         api_level       => 1,
2729         argc            => 1,
2730 );
2731 __PACKAGE__->register_method( 
2732         api_name        => "open-ils.worm.wormize.no_map.batch",
2733         method          => "wormize",
2734         api_level       => 1,
2735         argc            => 1,
2736 );
2737
2738
2739 my $ain_xact;
2740 my $abegin;
2741 my $acommit;
2742 my $arollback;
2743 my $alookup;
2744 my $aupdate_entry;
2745 my $amr_lookup;
2746 my $amr_update;
2747 my $amr_create;
2748 my $acreate_source_map;
2749 my $asm_lookup;
2750 my $arm_old_rd;
2751 my $arm_old_sm;
2752 my $arm_old_fr;
2753 my $arm_old_tr;
2754 my $arm_old_ar;
2755 my $arm_old_sr;
2756 my $arm_old_kr;
2757 my $arm_old_ser;
2758
2759 my $afr_create;
2760 my $ard_create;
2761 my $acreate = {};
2762
2763 sub authority_wormize {
2764
2765         my $self = shift;
2766         my $client = shift;
2767         my @docids = @_;
2768
2769         my $no_map = 0;
2770         if ($self->api_name =~ /no_map/o) {
2771                 $no_map = 1;
2772         }
2773
2774         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2775                 unless ($in_xact);
2776         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2777                 unless ($begin);
2778         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2779                 unless ($commit);
2780         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2781                 unless ($rollback);
2782         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2783                 unless ($alookup);
2784         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2785                 unless ($aupdate_entry);
2786         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2787                 unless ($arm_old_rd);
2788         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2789                 unless ($arm_old_fr);
2790         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2791                 unless ($ard_create);
2792         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2793                 unless ($afr_create);
2794
2795
2796         my ($outer_xact) = $in_xact->run;
2797         try {
2798                 unless ($outer_xact) {
2799                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2800                         my ($r) = $begin->run($client);
2801                         unless (defined $r and $r) {
2802                                 $rollback->run;
2803                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2804                         }
2805                 }
2806         } catch Error with {
2807                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2808         };
2809
2810         my @source_maps;
2811         my @entry_list;
2812         my @mr_list;
2813         my @rd_list;
2814         my @ns_list;
2815         my @mads_data;
2816         my $ret = 0;
2817         for my $entry ( $lookup->run(@docids) ) {
2818                 # step -1: grab the doc from storage
2819                 next unless ($entry);
2820
2821                 #if(!$mads_sheet) {
2822                 #       my $xslt_doc = $parser->parse_file(
2823                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2824                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2825                 #}
2826
2827                 my $xml = $entry->marc;
2828                 my $docid = $entry->id;
2829                 my $marcdoc = $parser->parse_string($xml);
2830                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2831
2832                 #my $mads = $madsdoc->documentElement;
2833                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2834
2835                 push @entry_list, $entry;
2836
2837                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2838                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2839
2840                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2841                 for my $rd_field ( keys %descriptor_code ) {
2842                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2843                 }
2844                 $rd_obj->record( $docid );
2845                 push @rd_list, $rd_obj;
2846
2847                 # step 2: build the KOHA rows
2848                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2849                 $_->record( $docid ) for (@tmp_list);
2850                 push @ns_list, @tmp_list;
2851
2852                 $ret++;
2853
2854                 last unless ($self->api_name =~ /batch$/o);
2855         }
2856
2857         $arm_old_rd->run( { record => \@docids } );
2858         $arm_old_fr->run( { record => \@docids } );
2859
2860         my ($rd) = $ard_create->run(@rd_list);
2861         unless (defined $rd) {
2862                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2863         }
2864
2865         my ($fr) = $fr_create->run(@ns_list);
2866         unless (defined $fr) {
2867                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2868         }
2869
2870         unless ($outer_xact) {
2871                 $log->debug("Commiting transaction started by Ingest.", INFO);
2872                 my ($c) = $commit->run;
2873                 unless (defined $c and $c) {
2874                         $rollback->run;
2875                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2876                 }
2877         }
2878
2879         return $ret;
2880 }
2881 __PACKAGE__->register_method( 
2882         api_name        => "open-ils.worm.authortiy.wormize",
2883         method          => "wormize",
2884         api_level       => 1,
2885         argc            => 1,
2886 );
2887 __PACKAGE__->register_method( 
2888         api_name        => "open-ils.worm.authority.wormize.batch",
2889         method          => "wormize",
2890         api_level       => 1,
2891         argc            => 1,
2892 );
2893
2894
2895 # --------------------------------------------------------------------------------
2896
2897
2898 sub _marcxml_to_full_rows {
2899
2900         my $marcxml = shift;
2901         my $type = shift || 'Fieldmapper::metabib::full_rec';
2902
2903         my @ns_list;
2904         
2905         my $root = $marcxml->documentElement;
2906
2907         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2908                 next unless $tagline;
2909
2910                 my $ns = new Fieldmapper::metabib::full_rec;
2911
2912                 $ns->tag( 'LDR' );
2913                 my $val = NFD($tagline->textContent);
2914                 $val =~ s/(\pM+)//gso;
2915                 $ns->value( $val );
2916
2917                 push @ns_list, $ns;
2918         }
2919
2920         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2921                 next unless $tagline;
2922
2923                 my $ns = new Fieldmapper::metabib::full_rec;
2924
2925                 $ns->tag( $tagline->getAttribute( "tag" ) );
2926                 my $val = NFD($tagline->textContent);
2927                 $val =~ s/(\pM+)//gso;
2928                 $ns->value( $val );
2929
2930                 push @ns_list, $ns;
2931         }
2932
2933         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2934                 next unless $tagline;
2935
2936                 my $tag = $tagline->getAttribute( "tag" );
2937                 my $ind1 = $tagline->getAttribute( "ind1" );
2938                 my $ind2 = $tagline->getAttribute( "ind2" );
2939
2940                 for my $data ( $tagline->childNodes ) {
2941                         next unless $data;
2942
2943                         my $ns = $type->new;
2944
2945                         $ns->tag( $tag );
2946                         $ns->ind1( $ind1 );
2947                         $ns->ind2( $ind2 );
2948                         $ns->subfield( $data->getAttribute( "code" ) );
2949                         my $val = NFD($data->textContent);
2950                         $val =~ s/(\pM+)//gso;
2951                         $ns->value( lc($val) );
2952
2953                         push @ns_list, $ns;
2954                 }
2955         }
2956         return @ns_list;
2957 }
2958
2959 sub _get_field_value {
2960
2961         my( $root, $xpath ) = @_;
2962
2963         my $string = "";
2964
2965         # grab the set of matching nodes
2966         my @nodes = $root->findnodes( $xpath );
2967         for my $value (@nodes) {
2968
2969                 # grab all children of the node
2970                 my @children = $value->childNodes();
2971                 for my $child (@children) {
2972
2973                         # add the childs content to the growing buffer
2974                         my $content = quotemeta($child->textContent);
2975                         next if ($string =~ /$content/);  # uniquify the values
2976                         $string .= $child->textContent . " ";
2977                 }
2978                 if( ! @children ) {
2979                         $string .= $value->textContent . " ";
2980                 }
2981         }
2982         $string = NFD($string);
2983         $string =~ s/(\pM)//gso;
2984         return lc($string);
2985 }
2986
2987
2988 sub modsdoc_to_values {
2989         my( $self, $mods ) = @_;
2990         my $data = {};
2991         for my $class (keys %$xpathset) {
2992                 $data->{$class} = {};
2993                 for my $type (keys %{$xpathset->{$class}}) {
2994                         $data->{$class}->{$type} = {};
2995                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2996                 }
2997         }
2998         return $data;
2999 }
3000
3001
3002 1;
3003
3004