]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
more in(de)gestion
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => ''},
25         oai_dc  => {ns => ''},
26         rdf_dc  => {ns => ''},
27 );
28
29
30 our $log = 'OpenSRF::Utils::Logger';
31
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
34
35 our $mods_sheet;
36 our $mads_sheet;
37 our $xpathset = {};
38 sub initialize {}
39 sub child_init {}
40
41 sub post_init {
42
43         unless (keys %$xpathset) {
44                 $log->debug("Running post_init", DEBUG);
45
46                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
47
48                 unless ($supported_formats{mods}{xslt}) {
49                         $log->debug("Loading MODS XSLT", DEBUG);
50                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
52                 }
53
54                 unless ($supported_formats{mods3}{xslt}) {
55                         $log->debug("Loading MODS v3 XSLT", DEBUG);
56                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
58                 }
59
60
61                 my $req = OpenSRF::AppSession
62                                 ->create('open-ils.cstore')
63                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
64                                 ->gather(1);
65
66                 if (ref $req and @$req) {
67                         for my $f (@$req) {
68                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
72                         }
73                 }
74         }
75 }
76
77 sub entityize {
78         my $stuff = shift;
79         my $form = shift;
80
81         if ($form eq 'D') {
82                 $stuff = NFD($stuff);
83         } else {
84                 $stuff = NFC($stuff);
85         }
86
87         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
88         return $stuff;
89 }
90
91 sub ro_biblio_ingest_single_object {
92         my $self = shift;
93         my $client = shift;
94         my $bib = shift;
95         my $xml = $bib->marc;
96
97         my $document = $parser->parse_string($xml);
98
99         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
100         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
101         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
102         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
103
104         $_->source($bib->id) for (@mXfe);
105         $_->record($bib->id) for (@mfr);
106         $rd->record($bib->id);
107
108         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
109 }
110 __PACKAGE__->register_method(  
111         api_name        => "open-ils.ingest.full.biblio.object.readonly",
112         method          => "ro_biblio_ingest_single_object",
113         api_level       => 1,
114         argc            => 1,
115 );                      
116
117 sub ro_biblio_ingest_single_xml {
118         my $self = shift;
119         my $client = shift;
120         my $xml = shift;
121
122         my $document = $parser->parse_string($xml);
123
124         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
125         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
126         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
127         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
128
129         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
130 }
131 __PACKAGE__->register_method(  
132         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
133         method          => "ro_biblio_ingest_single_xml",
134         api_level       => 1,
135         argc            => 1,
136 );                      
137
138 sub ro_biblio_ingest_single_record {
139         my $self = shift;
140         my $client = shift;
141         my $rec = shift;
142
143         OpenILS::Application::Ingest->post_init();
144         my $r = OpenSRF::AppSession
145                         ->create('open-ils.cstore')
146                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
147                         ->gather(1);
148
149         return undef unless ($r and @$r);
150
151         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
152
153         $_->source($rec) for (@{$res->{field_entries}});
154         $_->record($rec) for (@{$res->{full_rec}});
155         $res->{descriptor}->record($rec);
156
157         return $res;
158 }
159 __PACKAGE__->register_method(  
160         api_name        => "open-ils.ingest.full.biblio.record.readonly",
161         method          => "ro_biblio_ingest_single_record",
162         api_level       => 1,
163         argc            => 1,
164 );                      
165
166 sub ro_biblio_ingest_stream_record {
167         my $self = shift;
168         my $client = shift;
169
170         OpenILS::Application::Ingest->post_init();
171
172         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
173
174         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
175         
176                 my $rec = $resp->content;
177                 last unless (defined $rec);
178
179                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
180                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
181
182                 $_->source($rec) for (@{$res->{field_entries}});
183                 $_->record($rec) for (@{$res->{full_rec}});
184
185                 $client->respond( $res );
186         }
187
188         return undef;
189 }
190 __PACKAGE__->register_method(  
191         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
192         method          => "ro_biblio_ingest_stream_record",
193         api_level       => 1,
194         stream          => 1,
195 );                      
196
197 sub ro_biblio_ingest_stream_xml {
198         my $self = shift;
199         my $client = shift;
200
201         OpenILS::Application::Ingest->post_init();
202
203         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
204
205         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
206         
207                 my $xml = $resp->content;
208                 last unless (defined $xml);
209
210                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
211                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
212
213                 $client->respond( $res );
214         }
215
216         return undef;
217 }
218 __PACKAGE__->register_method(  
219         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
220         method          => "ro_biblio_ingest_stream_xml",
221         api_level       => 1,
222         stream          => 1,
223 );                      
224
225 sub rw_biblio_ingest_stream_import {
226         my $self = shift;
227         my $client = shift;
228
229         OpenILS::Application::Ingest->post_init();
230
231         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
232
233         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
234         
235                 my $bib = $resp->content;
236                 last unless (defined $bib);
237
238                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
239                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
240
241                 $_->source($bib->id) for (@{$res->{field_entries}});
242                 $_->record($bib->id) for (@{$res->{full_rec}});
243
244                 $client->respond( $res );
245         }
246
247         return undef;
248 }
249 __PACKAGE__->register_method(  
250         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
251         method          => "rw_biblio_ingest_stream_import",
252         api_level       => 1,
253         stream          => 1,
254 );                      
255
256
257 # --------------------------------------------------------------------------------
258 # MARC index extraction
259
260 package OpenILS::Application::Ingest::XPATH;
261 use base qw/OpenILS::Application::Ingest/;
262 use Unicode::Normalize;
263
264 # give this an XML documentElement and an XPATH expression
265 sub xpath_to_string {
266         my $xml = shift;
267         my $xpath = shift;
268         my $ns_uri = shift;
269         my $ns_prefix = shift;
270         my $unique = shift;
271
272         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
273
274         my $string = "";
275
276         # grab the set of matching nodes
277         my @nodes = $xml->findnodes( $xpath );
278         for my $value (@nodes) {
279
280                 # grab all children of the node
281                 my @children = $value->childNodes();
282                 for my $child (@children) {
283
284                         # add the childs content to the growing buffer
285                         my $content = quotemeta($child->textContent);
286                         next if ($unique && $string =~ /$content/);  # uniquify the values
287                         $string .= $child->textContent . " ";
288                 }
289                 if( ! @children ) {
290                         $string .= $value->textContent . " ";
291                 }
292         }
293         return NFD($string);
294 }
295
296 sub class_index_string_xml {
297         my $self = shift;
298         my $client = shift;
299         my $xml = shift;
300         my @classes = @_;
301
302         OpenILS::Application::Ingest->post_init();
303         $xml = $parser->parse_string($xml) unless (ref $xml);
304
305         my %transform_cache;
306         
307         for my $class (@classes) {
308                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
309                 for my $type ( keys %{ $xpathset->{$class} } ) {
310
311                         my $def = $xpathset->{$class}->{$type};
312                         my $sf = $supported_formats{$def->{format}};
313
314                         my $document = $xml;
315
316                         if ($sf->{xslt}) {
317                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
318                                 $transform_cache{$def->{format}} = $document;
319                         }
320
321                         my $value =  xpath_to_string(
322                                         $document->documentElement      => $def->{xpath},
323                                         $sf->{ns}                       => $def->{format},
324                                         1
325                         );
326
327                         next unless $value;
328
329                         $value =~ s/\pM+//sgo;
330                         $value =~ s/\pC+//sgo;
331                         #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
332
333                         $value =~ s/(\w)\./$1/sgo;
334                         $value = lc($value);
335
336                         my $fm = $class_constructor->new;
337                         $fm->value( $value );
338                         $fm->field( $xpathset->{$class}->{$type}->{id} );
339                         $client->respond($fm);
340                 }
341         }
342         return undef;
343 }
344 __PACKAGE__->register_method(  
345         api_name        => "open-ils.ingest.field_entry.class.xml",
346         method          => "class_index_string_xml",
347         api_level       => 1,
348         argc            => 2,
349         stream          => 1,
350 );                      
351
352 sub class_index_string_record {
353         my $self = shift;
354         my $client = shift;
355         my $rec = shift;
356         my @classes = shift;
357
358         OpenILS::Application::Ingest->post_init();
359         my $r = OpenSRF::AppSession
360                         ->create('open-ils.cstore')
361                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
362                         ->gather(1);
363
364         return undef unless ($r and @$r);
365
366         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
367                 $fm->source($rec);
368                 $client->respond($fm);
369         }
370         return undef;
371 }
372 __PACKAGE__->register_method(  
373         api_name        => "open-ils.ingest.field_entry.class.record",
374         method          => "class_index_string_record",
375         api_level       => 1,
376         argc            => 2,
377         stream          => 1,
378 );                      
379
380 sub all_index_string_xml {
381         my $self = shift;
382         my $client = shift;
383         my $xml = shift;
384
385         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
386                 $client->respond($fm);
387         }
388         return undef;
389 }
390 __PACKAGE__->register_method(  
391         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
392         method          => "all_index_string_xml",
393         api_level       => 1,
394         argc            => 1,
395         stream          => 1,
396 );                      
397
398 sub all_index_string_record {
399         my $self = shift;
400         my $client = shift;
401         my $rec = shift;
402
403         OpenILS::Application::Ingest->post_init();
404         my $r = OpenSRF::AppSession
405                         ->create('open-ils.cstore')
406                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
407                         ->gather(1);
408
409         return undef unless ($r and @$r);
410
411         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
412                 $fm->source($rec);
413                 $client->respond($fm);
414         }
415         return undef;
416 }
417 __PACKAGE__->register_method(  
418         api_name        => "open-ils.ingest.extract.field_entry.all.record",
419         method          => "all_index_string_record",
420         api_level       => 1,
421         argc            => 1,
422         stream          => 1,
423 );                      
424
425 # --------------------------------------------------------------------------------
426 # Flat MARC
427
428 package OpenILS::Application::Ingest::FlatMARC;
429 use base qw/OpenILS::Application::Ingest/;
430 use Unicode::Normalize;
431
432
433 sub _marcxml_to_full_rows {
434
435         my $marcxml = shift;
436         my $xmltype = shift || 'metabib';
437
438         my $type = "Fieldmapper::${xmltype}::full_rec";
439
440         my @ns_list;
441         
442         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
443
444         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
445                 next unless $tagline;
446
447                 my $ns = $type->new;
448
449                 $ns->tag( 'LDR' );
450                 my $val = $tagline->textContent;
451                 $val = NFD($val);
452                 $val =~ s/(\pM+)//gso;
453                 $ns->value( $val );
454
455                 push @ns_list, $ns;
456         }
457
458         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
459                 next unless $tagline;
460
461                 my $ns = $type->new;
462
463                 $ns->tag( $tagline->getAttribute( "tag" ) );
464                 my $val = $tagline->textContent;
465                 $val = NFD($val);
466                 $val =~ s/(\pM+)//gso;
467                 $ns->value( $val );
468
469                 push @ns_list, $ns;
470         }
471
472         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
473                 next unless $tagline;
474
475                 my $tag = $tagline->getAttribute( "tag" );
476                 my $ind1 = $tagline->getAttribute( "ind1" );
477                 my $ind2 = $tagline->getAttribute( "ind2" );
478
479                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
480                         next unless $data;
481
482                         my $ns = $type->new;
483
484                         $ns->tag( $tag );
485                         $ns->ind1( $ind1 );
486                         $ns->ind2( $ind2 );
487                         $ns->subfield( $data->getAttribute( "code" ) );
488                         my $val = $data->textContent;
489                         $val = NFD($val);
490                         $val =~ s/(\pM+)//gso;
491                         $ns->value( lc($val) );
492
493                         push @ns_list, $ns;
494                 }
495         }
496
497         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
498         return @ns_list;
499 }
500
501 sub flat_marc_xml {
502         my $self = shift;
503         my $client = shift;
504         my $xml = shift;
505
506         $log->debug("processing [$xml]");
507
508         $xml = $parser->parse_string($xml) unless (ref $xml);
509
510         my $type = 'metabib';
511         $type = 'authority' if ($self->api_name =~ /authority/o);
512
513         OpenILS::Application::Ingest->post_init();
514
515         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
516         return undef;
517 }
518 __PACKAGE__->register_method(  
519         api_name        => "open-ils.ingest.flat_marc.authority.xml",
520         method          => "flat_marc_xml",
521         api_level       => 1,
522         argc            => 1,
523         stream          => 1,
524 );                      
525 __PACKAGE__->register_method(  
526         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
527         method          => "flat_marc_xml",
528         api_level       => 1,
529         argc            => 1,
530         stream          => 1,
531 );                      
532
533 sub flat_marc_record {
534         my $self = shift;
535         my $client = shift;
536         my $rec = shift;
537
538         my $type = 'biblio';
539         $type = 'authority' if ($self->api_name =~ /authority/o);
540
541         OpenILS::Application::Ingest->post_init();
542         my $r = OpenSRF::AppSession
543                         ->create('open-ils.cstore')
544                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
545                         ->gather(1);
546
547
548         return undef unless ($r and $r->marc);
549
550         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
551         for my $row (@rows) {
552                 $client->respond($row);
553                 $log->debug(JSON->perl2JSON($row), DEBUG);
554         }
555         return undef;
556 }
557 __PACKAGE__->register_method(  
558         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
559         method          => "flat_marc_record",
560         api_level       => 1,
561         argc            => 1,
562         stream          => 1,
563 );                      
564 __PACKAGE__->register_method(  
565         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
566         method          => "flat_marc_record",
567         api_level       => 1,
568         argc            => 1,
569         stream          => 1,
570 );                      
571
572 # --------------------------------------------------------------------------------
573 # Fingerprinting
574
575 package OpenILS::Application::Ingest::Biblio::Fingerprint;
576 use base qw/OpenILS::Application::Ingest/;
577 use Unicode::Normalize;
578 use OpenSRF::EX qw/:try/;
579
580 sub biblio_fingerprint_record {
581         my $self = shift;
582         my $client = shift;
583         my $rec = shift;
584
585         OpenILS::Application::Ingest->post_init();
586
587         my $r = OpenSRF::AppSession
588                         ->create('open-ils.cstore')
589                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
590                         ->gather(1);
591
592         return undef unless ($r and $r->marc);
593
594         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
595         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
596         return $fp;
597 }
598 __PACKAGE__->register_method(  
599         api_name        => "open-ils.ingest.fingerprint.record",
600         method          => "biblio_fingerprint_record",
601         api_level       => 1,
602         argc            => 1,
603 );                      
604
605 our $fp_script;
606 sub biblio_fingerprint {
607         my $self = shift;
608         my $client = shift;
609         my $xml = shift;
610
611         $log->internal("Got MARC [$xml]");
612
613         if(!$fp_script) {
614                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
615                 my $conf = OpenSRF::Utils::SettingsClient->new;
616
617                 my $libs        = $conf->config_value(@pfx, 'script_path');
618                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
619                 my $script_libs = (ref($libs)) ? $libs : [$libs];
620
621                 $log->debug("Loading script $script_file for biblio fingerprinting...");
622                 
623                 $fp_script = new OpenILS::Utils::ScriptRunner
624                         ( file          => $script_file,
625                           paths         => $script_libs,
626                           reset_count   => 1000 );
627         }
628
629         $fp_script->insert('environment' => {marc => $xml} => 1);
630
631         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
632         $log->debug("Script for biblio fingerprinting completed successfully...");
633
634         return $res;
635 }
636 __PACKAGE__->register_method(  
637         api_name        => "open-ils.ingest.fingerprint.xml",
638         method          => "biblio_fingerprint",
639         api_level       => 1,
640         argc            => 1,
641 );                      
642
643 our $rd_script;
644 sub biblio_descriptor {
645         my $self = shift;
646         my $client = shift;
647         my $xml = shift;
648
649         $log->internal("Got MARC [$xml]");
650
651         if(!$rd_script) {
652                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
653                 my $conf = OpenSRF::Utils::SettingsClient->new;
654
655                 my $libs        = $conf->config_value(@pfx, 'script_path');
656                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
657                 my $script_libs = (ref($libs)) ? $libs : [$libs];
658
659                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
660                 
661                 $rd_script = new OpenILS::Utils::ScriptRunner
662                         ( file          => $script_file,
663                           paths         => $script_libs,
664                           reset_count   => 1000 );
665         }
666
667         $rd_script->insert('environment' => {marc => $xml} => 1);
668
669         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
670         $log->debug("Script for biblio descriptor extraction completed successfully...");
671
672         return $res;
673 }
674 __PACKAGE__->register_method(  
675         api_name        => "open-ils.ingest.descriptor.xml",
676         method          => "biblio_descriptor",
677         api_level       => 1,
678         argc            => 1,
679 );                      
680
681
682 1;
683
684 __END__
685
686 sub in_transaction {
687         OpenILS::Application::Ingest->post_init();
688         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
689 }
690
691 sub begin_transaction {
692         my $self = shift;
693         my $client = shift;
694         
695         OpenILS::Application::Ingest->post_init();
696         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
697         
698         try {
699                 if (!$outer_xact) {
700                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
701                         #__PACKAGE__->st_sess->connect;
702                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
703                         unless (defined $r and $r) {
704                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
705                                 #__PACKAGE__->st_sess->disconnect;
706                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
707                         }
708                 }
709         } otherwise {
710                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
711         };
712
713         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
714 }
715
716 sub rollback_transaction {
717         my $self = shift;
718         my $client = shift;
719
720         OpenILS::Application::Ingest->post_init();
721         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
722
723         try {
724                 if ($outer_xact) {
725                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
726                 } else {
727                         $log->debug("Ingest isn't inside a transaction.", INFO);
728                 }
729         } catch Error with {
730                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
731         };
732
733         return 1;
734 }
735
736 sub commit_transaction {
737         my $self = shift;
738         my $client = shift;
739
740         OpenILS::Application::Ingest->post_init();
741         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
742
743         try {
744                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
745                 if ($outer_xact) {
746                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
747                         unless (defined $r and $r) {
748                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
749                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
750                         }
751                         #__PACKAGE__->st_sess->disconnect;
752                 } else {
753                         $log->debug("Ingest isn't inside a transaction.", INFO);
754                 }
755         } catch Error with {
756                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
757         };
758
759         return 1;
760 }
761
762 sub storage_req {
763         my $self = shift;
764         my $method = shift;
765         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
766         return shift( @res );
767 }
768
769 sub scrub_authority_record {
770         my $self = shift;
771         my $client = shift;
772         my $rec = shift;
773
774         my $commit = 0;
775         if (!OpenILS::Application::Ingest->in_transaction) {
776                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
777                 $commit = 1;
778         }
779
780         my $success = 1;
781         try {
782                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
783
784                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
785                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
786
787                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
788         } otherwise {
789                 $log->debug('Scrubbing failed : '.shift(), ERROR);
790                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
791                 $success = 0;
792         };
793
794         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
795         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
796         return $success;
797 }
798 __PACKAGE__->register_method(  
799         api_name        => "open-ils.worm.scrub.authority",
800         method          => "scrub_authority_record",
801         api_level       => 1,
802         argc            => 1,
803 );                      
804
805
806 sub scrub_metabib_record {
807         my $self = shift;
808         my $client = shift;
809         my $rec = shift;
810
811         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
812                 $rec = OpenILS::Application::Ingest->storage_req(
813                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
814                 );
815         }
816
817         my $commit = 0;
818         if (!OpenILS::Application::Ingest->in_transaction) {
819                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
820                 $commit = 1;
821         }
822
823         my $success = 1;
824         try {
825                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
826                 
827                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
828                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
829                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
830                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
831                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
832                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
833                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
834                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
835
836                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
837                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
838
839                 for my $mr (@$masters) {
840                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
841                         my $others = OpenILS::Application::Ingest->storage_req(
842                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
843
844                         if (@$others) {
845                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
846                                 $mr->master_record($others->[0]->source);
847                                 OpenILS::Application::Ingest->storage_req(
848                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
849                                         { id => $mr->id },
850                                         { master_record => $others->[0]->source, mods => undef }
851                                 );
852                         } else {
853                                 warn "Removing metarecord whose master is $rec";
854                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
855                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
856                                 warn "Metarecord removed";
857                                 $log->debug( "Metarecord removed", DEBUG);
858                         }
859                 }
860
861                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
862
863         } otherwise {
864                 $log->debug('Scrubbing failed : '.shift(), ERROR);
865                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
866                 $success = 0;
867         };
868
869         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
870         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
871         return $success;
872 }
873 __PACKAGE__->register_method(  
874         api_name        => "open-ils.worm.scrub.biblio",
875         method          => "scrub_metabib_record",
876         api_level       => 1,
877         argc            => 1,
878 );                      
879
880 sub wormize_biblio_metarecord {
881         my $self = shift;
882         my $client = shift;
883         my $mrec = shift;
884
885         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
886
887         my $count = 0;
888         for my $r (@$recs) {
889                 my $success = 0;
890                 try {
891                         $success = wormize_biblio_record($self => $client => $r->source);
892                         $client->respond(
893                                 { record  => $r->source,
894                                   metarecord => $rec->metarecord,
895                                   success => $success,
896                                 }
897                         );
898                 } catch Error with {
899                         my $e = shift;
900                         $client->respond(
901                                 { record  => $r->source,
902                                   metarecord => $rec->metarecord,
903                                   success => $success,
904                                   error   => $e,
905                                 }
906                         );
907                 };
908         }
909         return undef;
910 }
911 __PACKAGE__->register_method(
912         api_name        => "open-ils.worm.wormize.metarecord",
913         method          => "wormize_biblio_metarecord",
914         api_level       => 1,
915         argc            => 1,
916         stream          => 1,
917 );
918 __PACKAGE__->register_method(
919         api_name        => "open-ils.worm.wormize.metarecord.nomap",
920         method          => "wormize_biblio_metarecord",
921         api_level       => 1,
922         argc            => 1,
923         stream          => 1,
924 );
925 __PACKAGE__->register_method(
926         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
927         method          => "wormize_biblio_metarecord",
928         api_level       => 1,
929         argc            => 1,
930         stream          => 1,
931 );
932 __PACKAGE__->register_method(
933         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
934         method          => "wormize_biblio_metarecord",
935         api_level       => 1,
936         argc            => 1,
937         stream          => 1,
938 );
939
940
941 sub wormize_biblio_record {
942         my $self = shift;
943         my $client = shift;
944         my $rec = shift;
945
946         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
947                 $rec = OpenILS::Application::Ingest->storage_req(
948                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
949                 );
950         }
951
952
953         my $commit = 0;
954         if (!OpenILS::Application::Ingest->in_transaction) {
955                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
956                 $commit = 1;
957         }
958
959         my $success = 1;
960         try {
961                 # clean up the cruft
962                 unless ($self->api_name =~ /noscrub/o) {
963                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
964                 }
965
966                 # now redo 'em
967                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
968
969                 my @full_rec = ();
970                 my @rec_descriptor = ();
971                 my %field_entry = (
972                         title   => [],
973                         author  => [],
974                         subject => [],
975                         keyword => [],
976                         series  => [],
977                 );
978                 my %metarecord = ();
979                 my @source_map = ();
980                 for my $r (@$bibs) {
981                         try {
982                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
983
984                                 my $xml = $parser->parse_string($r->marc);
985
986                                 #update the fingerprint
987                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
988                                 OpenILS::Application::Ingest->storage_req(
989                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
990                                         { id => $r->id },
991                                         { fingerprint => $fp->{fingerprint},
992                                           quality     => int($fp->{quality}) }
993                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
994
995                                 # the full_rec stuff
996                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
997                                         $fr->record( $r->id );
998                                         push @full_rec, $fr;
999                                 }
1000
1001                                 # the rec_descriptor stuff
1002                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1003                                 $rd->record( $r->id );
1004                                 push @rec_descriptor, $rd;
1005                         
1006                                 # the indexing field entry stuff
1007                                 for my $class ( qw/title author subject keyword series/ ) {
1008                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1009                                                 $fe->source( $r->id );
1010                                                 push @{$field_entry{$class}}, $fe;
1011                                         }
1012                                 }
1013
1014                                 unless ($self->api_name =~ /nomap/o) {
1015                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1016                                 
1017                                         unless ($mr) {
1018                                                 $mr = Fieldmapper::metabib::metarecord->new;
1019                                                 $mr->fingerprint( $fp->{fingerprint} );
1020                                                 $mr->master_record( $r->id );
1021                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1022                                         }
1023
1024                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1025                                         $mr_map->metarecord( $mr->id );
1026                                         $mr_map->source( $r->id );
1027                                         push @source_map, $mr_map;
1028
1029                                         $metarecord{$mr->id} = $mr;
1030                                 }
1031                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1032                         } otherwise {
1033                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1034                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1035                         };
1036                 }
1037                 
1038
1039                 if (@rec_descriptor) {
1040                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1041
1042                         OpenILS::Application::Ingest->storage_req(
1043                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1044                                 @source_map
1045                         ) if (@source_map);
1046
1047                         for my $mr ( values %metarecord ) {
1048                                 my $sources = OpenILS::Application::Ingest->storage_req(
1049                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1050                                         $mr->id
1051                                 );
1052
1053                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1054                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1055                                         [ map { $_->source } @$sources ]
1056                                 );
1057
1058                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1059
1060                                 OpenILS::Application::Ingest->storage_req(
1061                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1062                                         { id => $mr->id },
1063                                         { master_record => $master->id, mods => undef }
1064                                 );
1065                         }
1066
1067                         OpenILS::Application::Ingest->storage_req(
1068                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1069                                 @rec_descriptor
1070                         ) if (@rec_descriptor);
1071
1072                         OpenILS::Application::Ingest->storage_req(
1073                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1074                                 @full_rec
1075                         ) if (@full_rec);
1076
1077                         OpenILS::Application::Ingest->storage_req(
1078                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1079                                 @{ $field_entry{title} }
1080                         ) if (@{ $field_entry{title} });
1081
1082                         OpenILS::Application::Ingest->storage_req(
1083                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1084                                 @{ $field_entry{author} }
1085                         ) if (@{ $field_entry{author} });
1086                         
1087                         OpenILS::Application::Ingest->storage_req(
1088                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1089                                 @{ $field_entry{subject} }
1090                         ) if (@{ $field_entry{subject} });
1091
1092                         OpenILS::Application::Ingest->storage_req(
1093                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1094                                 @{ $field_entry{keyword} }
1095                         ) if (@{ $field_entry{keyword} });
1096
1097                         OpenILS::Application::Ingest->storage_req(
1098                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1099                                 @{ $field_entry{series} }
1100                         ) if (@{ $field_entry{series} });
1101
1102                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1103                 } else {
1104                         $success = 0;
1105                 }
1106
1107         } otherwise {
1108                 $log->debug('Wormization failed : '.shift(), ERROR);
1109                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1110                 $success = 0;
1111         };
1112
1113         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1114         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1115         return $success;
1116 }
1117 __PACKAGE__->register_method(
1118         api_name        => "open-ils.worm.wormize.biblio",
1119         method          => "wormize_biblio_record",
1120         api_level       => 1,
1121         argc            => 1,
1122 );
1123 __PACKAGE__->register_method(
1124         api_name        => "open-ils.worm.wormize.biblio.nomap",
1125         method          => "wormize_biblio_record",
1126         api_level       => 1,
1127         argc            => 1,
1128 );
1129 __PACKAGE__->register_method(
1130         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1131         method          => "wormize_biblio_record",
1132         api_level       => 1,
1133         argc            => 1,
1134 );
1135 __PACKAGE__->register_method(
1136         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1137         method          => "wormize_biblio_record",
1138         api_level       => 1,
1139         argc            => 1,
1140 );
1141
1142 sub wormize_authority_record {
1143         my $self = shift;
1144         my $client = shift;
1145         my $rec = shift;
1146
1147         my $commit = 0;
1148         if (!OpenILS::Application::Ingest->in_transaction) {
1149                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1150                 $commit = 1;
1151         }
1152
1153         my $success = 1;
1154         try {
1155                 # clean up the cruft
1156                 unless ($self->api_name =~ /noscrub/o) {
1157                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1158                 }
1159
1160                 # now redo 'em
1161                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1162
1163                 my @full_rec = ();
1164                 my @rec_descriptor = ();
1165                 for my $r (@$bibs) {
1166                         my $xml = $parser->parse_string($r->marc);
1167
1168                         # the full_rec stuff
1169                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1170                                 $fr->record( $r->id );
1171                                 push @full_rec, $fr;
1172                         }
1173
1174                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1175                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1176                         #$rd->record( $r->id );
1177                         #push @rec_descriptor, $rd;
1178                         
1179                 }
1180
1181                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1182
1183                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1184                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1185
1186                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1187
1188         } otherwise {
1189                 $log->debug('Wormization failed : '.shift(), ERROR);
1190                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1191                 $success = 0;
1192         };
1193
1194         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1195         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1196         return $success;
1197 }
1198 __PACKAGE__->register_method(
1199         api_name        => "open-ils.worm.wormize.authority",
1200         method          => "wormize_authority_record",
1201         api_level       => 1,
1202         argc            => 1,
1203 );
1204 __PACKAGE__->register_method(
1205         api_name        => "open-ils.worm.wormize.authority.noscrub",
1206         method          => "wormize_authority_record",
1207         api_level       => 1,
1208         argc            => 1,
1209 );
1210
1211
1212 # --------------------------------------------------------------------------------
1213 # MARC index extraction
1214
1215 package OpenILS::Application::Ingest::XPATH;
1216 use base qw/OpenILS::Application::Ingest/;
1217 use Unicode::Normalize;
1218
1219 # give this a MODS documentElement and an XPATH expression
1220 sub _xpath_to_string {
1221         my $xml = shift;
1222         my $xpath = shift;
1223         my $ns_uri = shift;
1224         my $ns_prefix = shift;
1225         my $unique = shift;
1226
1227         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1228
1229         my $string = "";
1230
1231         # grab the set of matching nodes
1232         my @nodes = $xml->findnodes( $xpath );
1233         for my $value (@nodes) {
1234
1235                 # grab all children of the node
1236                 my @children = $value->childNodes();
1237                 for my $child (@children) {
1238
1239                         # add the childs content to the growing buffer
1240                         my $content = quotemeta($child->textContent);
1241                         next if ($unique && $string =~ /$content/);  # uniquify the values
1242                         $string .= $child->textContent . " ";
1243                 }
1244                 if( ! @children ) {
1245                         $string .= $value->textContent . " ";
1246                 }
1247         }
1248         return NFD($string);
1249 }
1250
1251 sub class_all_index_string_xml {
1252         my $self = shift;
1253         my $client = shift;
1254         my $xml = shift;
1255         my $class = shift;
1256
1257         OpenILS::Application::Ingest->post_init();
1258         $xml = $parser->parse_string($xml) unless (ref $xml);
1259         
1260         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1261         for my $type ( keys %{ $xpathset->{$class} } ) {
1262                 my $value =  _xpath_to_string(
1263                                 $mods_sheet->transform($xml)->documentElement,
1264                                 $xpathset->{$class}->{$type}->{xpath},
1265                                 "http://www.loc.gov/mods/",
1266                                 "mods",
1267                                 1
1268                 );
1269
1270                 next unless $value;
1271
1272                 $value =~ s/\pM+//sgo;
1273                 $value =~ s/\pC+//sgo;
1274                 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1275
1276                 $value =~ s/(\w)\./$1/sgo;
1277                 $value = lc($value);
1278
1279                 my $fm = $class_constructor->new;
1280                 $fm->value( $value );
1281                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1282                 $client->respond($fm);
1283         }
1284         return undef;
1285 }
1286 __PACKAGE__->register_method(  
1287         api_name        => "open-ils.worm.field_entry.class.xml",
1288         method          => "class_all_index_string_xml",
1289         api_level       => 1,
1290         argc            => 1,
1291         stream          => 1,
1292 );                      
1293
1294 sub class_all_index_string_record {
1295         my $self = shift;
1296         my $client = shift;
1297         my $rec = shift;
1298         my $class = shift;
1299
1300         OpenILS::Application::Ingest->post_init();
1301         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1302
1303         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1304                 $fm->source($rec);
1305                 $client->respond($fm);
1306         }
1307         return undef;
1308 }
1309 __PACKAGE__->register_method(  
1310         api_name        => "open-ils.worm.field_entry.class.record",
1311         method          => "class_all_index_string_record",
1312         api_level       => 1,
1313         argc            => 1,
1314         stream          => 1,
1315 );                      
1316
1317
1318 sub class_index_string_xml {
1319         my $self = shift;
1320         my $client = shift;
1321         my $xml = shift;
1322         my $class = shift;
1323         my $type = shift;
1324
1325         OpenILS::Application::Ingest->post_init();
1326         $xml = $parser->parse_string($xml) unless (ref $xml);
1327         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1328 }
1329 __PACKAGE__->register_method(  
1330         api_name        => "open-ils.worm.class.type.xml",
1331         method          => "class_index_string_xml",
1332         api_level       => 1,
1333         argc            => 1,
1334 );                      
1335
1336 sub class_index_string_record {
1337         my $self = shift;
1338         my $client = shift;
1339         my $rec = shift;
1340         my $class = shift;
1341         my $type = shift;
1342
1343         OpenILS::Application::Ingest->post_init();
1344         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1345
1346         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1347         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1348         return $d;
1349 }
1350 __PACKAGE__->register_method(  
1351         api_name        => "open-ils.worm.class.type.record",
1352         method          => "class_index_string_record",
1353         api_level       => 1,
1354         argc            => 1,
1355 );                      
1356
1357 sub xml_xpath {
1358         my $self = shift;
1359         my $client = shift;
1360         my $xml = shift;
1361         my $xpath = shift;
1362         my $uri = shift;
1363         my $prefix = shift;
1364         my $unique = shift;
1365
1366         OpenILS::Application::Ingest->post_init();
1367         $xml = $parser->parse_string($xml) unless (ref $xml);
1368         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1369 }
1370 __PACKAGE__->register_method(  
1371         api_name        => "open-ils.worm.xpath.xml",
1372         method          => "xml_xpath",
1373         api_level       => 1,
1374         argc            => 1,
1375 );                      
1376
1377 sub record_xpath {
1378         my $self = shift;
1379         my $client = shift;
1380         my $rec = shift;
1381         my $xpath = shift;
1382         my $uri = shift;
1383         my $prefix = shift;
1384         my $unique = shift;
1385
1386         OpenILS::Application::Ingest->post_init();
1387         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1388
1389         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1390         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1391         return $d;
1392 }
1393 __PACKAGE__->register_method(  
1394         api_name        => "open-ils.worm.xpath.record",
1395         method          => "record_xpath",
1396         api_level       => 1,
1397         argc            => 1,
1398 );                      
1399
1400
1401 # --------------------------------------------------------------------------------
1402 # MARC Descriptor
1403
1404 package OpenILS::Application::Ingest::Biblio::Leader;
1405 use base qw/OpenILS::Application::Ingest/;
1406 use Unicode::Normalize;
1407
1408 our %marc_type_groups = (
1409         BKS => q/[at]{1}/,
1410         SER => q/[a]{1}/,
1411         VIS => q/[gkro]{1}/,
1412         MIX => q/[p]{1}/,
1413         MAP => q/[ef]{1}/,
1414         SCO => q/[cd]{1}/,
1415         REC => q/[ij]{1}/,
1416         COM => q/[m]{1}/,
1417 );
1418
1419 sub _type_re {
1420         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1421         return qr/$re/;
1422 }
1423
1424 our %biblio_descriptor_code = (
1425         item_type => sub { substr($ldr,6,1); },
1426         item_form =>
1427                 sub {
1428                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1429                                 return substr($oo8,29,1);
1430                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1431                                 return substr($oo8,23,1);
1432                         }
1433                         return ' ';
1434                 },
1435         bib_level => sub { substr($ldr,7,1); },
1436         control_type => sub { substr($ldr,8,1); },
1437         char_encoding => sub { substr($ldr,9,1); },
1438         enc_level => sub { substr($ldr,17,1); },
1439         cat_form => sub { substr($ldr,18,1); },
1440         pub_status => sub { substr($ldr,5,1); },
1441         item_lang => sub { substr($oo8,35,3); },
1442         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1443         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1444         audience => sub { substr($oo8,22,1); },
1445 );
1446
1447 sub _extract_biblio_descriptors {
1448         my $xml = shift;
1449
1450         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1451         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1452         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1453
1454         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1455         for my $rd_field ( keys %biblio_descriptor_code ) {
1456                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1457         }
1458
1459         return $rd_obj;
1460 }
1461
1462 sub extract_biblio_desc_xml {
1463         my $self = shift;
1464         my $client = shift;
1465         my $xml = shift;
1466
1467         $xml = $parser->parse_string($xml) unless (ref $xml);
1468
1469         return _extract_biblio_descriptors( $xml );
1470 }
1471 __PACKAGE__->register_method(  
1472         api_name        => "open-ils.worm.biblio_leader.xml",
1473         method          => "extract_biblio_desc_xml",
1474         api_level       => 1,
1475         argc            => 1,
1476 );                      
1477
1478 sub extract_biblio_desc_record {
1479         my $self = shift;
1480         my $client = shift;
1481         my $rec = shift;
1482
1483         OpenILS::Application::Ingest->post_init();
1484         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1485
1486         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1487         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1488         return $d;
1489 }
1490 __PACKAGE__->register_method(  
1491         api_name        => "open-ils.worm.biblio_leader.record",
1492         method          => "extract_biblio_desc_record",
1493         api_level       => 1,
1494         argc            => 1,
1495 );                      
1496
1497 # --------------------------------------------------------------------------------
1498 # Flat MARC
1499
1500 package OpenILS::Application::Ingest::FlatMARC;
1501 use base qw/OpenILS::Application::Ingest/;
1502 use Unicode::Normalize;
1503
1504
1505 sub _marcxml_to_full_rows {
1506
1507         my $marcxml = shift;
1508         my $xmltype = shift || 'metabib';
1509
1510         my $type = "Fieldmapper::${xmltype}::full_rec";
1511
1512         my @ns_list;
1513         
1514         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1515
1516         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1517                 next unless $tagline;
1518
1519                 my $ns = $type->new;
1520
1521                 $ns->tag( 'LDR' );
1522                 my $val = $tagline->textContent;
1523                 $val = NFD($val);
1524                 $val =~ s/(\pM+)//gso;
1525                 $ns->value( $val );
1526
1527                 push @ns_list, $ns;
1528         }
1529
1530         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1531                 next unless $tagline;
1532
1533                 my $ns = $type->new;
1534
1535                 $ns->tag( $tagline->getAttribute( "tag" ) );
1536                 my $val = $tagline->textContent;
1537                 $val = NFD($val);
1538                 $val =~ s/(\pM+)//gso;
1539                 $ns->value( $val );
1540
1541                 push @ns_list, $ns;
1542         }
1543
1544         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1545                 next unless $tagline;
1546
1547                 my $tag = $tagline->getAttribute( "tag" );
1548                 my $ind1 = $tagline->getAttribute( "ind1" );
1549                 my $ind2 = $tagline->getAttribute( "ind2" );
1550
1551                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1552                         next unless $data;
1553
1554                         my $ns = $type->new;
1555
1556                         $ns->tag( $tag );
1557                         $ns->ind1( $ind1 );
1558                         $ns->ind2( $ind2 );
1559                         $ns->subfield( $data->getAttribute( "code" ) );
1560                         my $val = $data->textContent;
1561                         $val = NFD($val);
1562                         $val =~ s/(\pM+)//gso;
1563                         $ns->value( lc($val) );
1564
1565                         push @ns_list, $ns;
1566                 }
1567         }
1568
1569         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1570         return @ns_list;
1571 }
1572
1573 sub flat_marc_xml {
1574         my $self = shift;
1575         my $client = shift;
1576         my $xml = shift;
1577
1578         $xml = $parser->parse_string($xml) unless (ref $xml);
1579
1580         my $type = 'metabib';
1581         $type = 'authority' if ($self->api_name =~ /authority/o);
1582
1583         OpenILS::Application::Ingest->post_init();
1584
1585         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1586         return undef;
1587 }
1588 __PACKAGE__->register_method(  
1589         api_name        => "open-ils.worm.flat_marc.authority.xml",
1590         method          => "flat_marc_xml",
1591         api_level       => 1,
1592         argc            => 1,
1593         stream          => 1,
1594 );                      
1595 __PACKAGE__->register_method(  
1596         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1597         method          => "flat_marc_xml",
1598         api_level       => 1,
1599         argc            => 1,
1600         stream          => 1,
1601 );                      
1602
1603 sub flat_marc_record {
1604         my $self = shift;
1605         my $client = shift;
1606         my $rec = shift;
1607
1608         my $type = 'biblio';
1609         $type = 'authority' if ($self->api_name =~ /authority/o);
1610
1611         OpenILS::Application::Ingest->post_init();
1612         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1613
1614         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1615         return undef;
1616 }
1617 __PACKAGE__->register_method(  
1618         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1619         method          => "flat_marc_record",
1620         api_level       => 1,
1621         argc            => 1,
1622         stream          => 1,
1623 );                      
1624 __PACKAGE__->register_method(  
1625         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1626         method          => "flat_marc_record",
1627         api_level       => 1,
1628         argc            => 1,
1629         stream          => 1,
1630 );                      
1631
1632
1633 # --------------------------------------------------------------------------------
1634 # Fingerprinting
1635
1636 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1637 use base qw/OpenILS::Application::Ingest/;
1638 use Unicode::Normalize;
1639 use OpenSRF::EX qw/:try/;
1640
1641 my @fp_mods_xpath = (
1642         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1643                         title   => {
1644                                         xpath   => [
1645                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1646                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1647                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1648                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1649                                         ],
1650                                         fixup   => sub {
1651                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1652                                                         $text = NFD($text);
1653                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1654                                                         $text =~ s/\pM+//gso;
1655                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1656                                                         $text = lc($text);
1657                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1658                                                         $text =~ s/\s+/ /sgo;
1659                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1660                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1661                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1662                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1663                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1664                                                         $text =~ s/\[.[^\]]+\]//sgo;
1665                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1666                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1667                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1668                                                 },
1669                         },
1670                         author  => {
1671                                         xpath   => [
1672                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1673                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1674                                         ],
1675                                         fixup   => sub {
1676                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1677                                                         $text = NFD($text);
1678                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1679                                                         $text =~ s/\pM+//gso;
1680                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1681                                                         $text = lc($text);
1682                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1683                                                         $text =~ s/\s+/ /sgo;
1684                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1685                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1686                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1687                                                         $text =~ s/,?\s+.*$//sgo;
1688                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1689                                                 },
1690                         },
1691         ],
1692
1693         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1694                         title   => {
1695                                         xpath   => [
1696                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1697                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1698                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1699                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1700                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1701                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1702                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1703                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1704                                         ],
1705                                         fixup   => sub {
1706                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1707                                                         $text = NFD($text);
1708                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1709                                                         $text =~ s/\pM+//gso;
1710                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1711                                                         $text = lc($text);
1712                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1713                                                         $text =~ s/\s+/ /sgo;
1714                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1715                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1716                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1717                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1718                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1719                                                         $text =~ s/\[.[^\]]+\]//sgo;
1720                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1721                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1722                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1723                                                 },
1724                         },
1725                         author  => {
1726                                         xpath   => [
1727                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1728                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1729                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1730                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1731                                         ],
1732                                         fixup   => sub {
1733                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1734                                                         $text = NFD($text);
1735                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1736                                                         $text =~ s/\pM+//gso;
1737                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1738                                                         $text = lc($text);
1739                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1740                                                         $text =~ s/\s+/ /sgo;
1741                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1742                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1743                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1744                                                         $text =~ s/,?\s+.*$//sgo;
1745                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1746                                                 },
1747                         },
1748         ],
1749
1750 );
1751
1752 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1753
1754 sub _fp_mods {
1755         my $mods = shift;
1756         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1757
1758         my $fp_string = '';
1759
1760         my $match_index = 0;
1761         my $block_index = 1;
1762         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1763                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1764
1765                         my $block_name_index = 0;
1766                         my $block_value_index = 1;
1767                         my $block = $fp_mods_xpath[$block_index];
1768                         while ( my $part = $$block[$block_value_index] ) {
1769                                 local $text;
1770                                 for my $xpath ( @{ $part->{xpath} } ) {
1771                                         $text = $mods->findvalue( $xpath );
1772                                         last if ($text);
1773                                 }
1774
1775                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1776
1777                                 if ($text) {
1778                                         $$part{fixup}->();
1779                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1780                                         $fp_string .= $text;
1781                                 }
1782
1783                                 $block_name_index += 2;
1784                                 $block_value_index += 2;
1785                         }
1786                 }
1787                 if ($fp_string) {
1788                         $fp_string =~ s/\W+//gso;
1789                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1790                         return $fp_string;
1791                 }
1792
1793                 $match_index += 2;
1794                 $block_index += 2;
1795         }
1796         return undef;
1797 }
1798
1799 sub refingerprint_bibrec {
1800         my $self = shift;
1801         my $client = shift;
1802         my $rec = shift;
1803
1804         my $commit = 0;
1805         if (!OpenILS::Application::Ingest->in_transaction) {
1806                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1807                 $commit = 1;
1808         }
1809
1810         my $success = 1;
1811         try {
1812                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1813                 for my $b (@$bibs) {
1814                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1815
1816                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1817
1818                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1819
1820                                 OpenILS::Application::Ingest->storage_req(
1821                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1822                                         { id => $b->id },
1823                                         { fingerprint => $fp->{fingerprint},
1824                                           quality     => $fp->{quality} }
1825                                 );
1826
1827                                 if ($self->api_name !~ /nomap/o) {
1828                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
1829                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1830                                                 $b->id
1831                                         );
1832
1833                                         my $old_mrid;
1834                                         if (ref($old_source_map) and @$old_source_map) {
1835                                                 for my $m (@$old_source_map) {
1836                                                         $old_mrid = $m->metarecord;
1837                                                         OpenILS::Application::Ingest->storage_req(
1838                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1839                                                                 $m->id
1840                                                         );
1841                                                 }
1842                                         }
1843
1844                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
1845                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1846                                                         { metarecord => $old_mrid }
1847                                         ) if ($old_mrid);
1848
1849                                         if (ref($old_sm) and @$old_sm == 0) {
1850                                                 OpenILS::Application::Ingest->storage_req(
1851                                                         'open-ils.storage.direct.metabib.metarecord.delete',
1852                                                         $old_mrid
1853                                                 );
1854                                         }
1855
1856                                         my $mr = OpenILS::Application::Ingest->storage_req(
1857                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1858                                                         { fingerprint => $fp->{fingerprint} }
1859                                         )->[0];
1860                                 
1861                                         unless ($mr) {
1862                                                 $mr = Fieldmapper::metabib::metarecord->new;
1863                                                 $mr->fingerprint( $fp->{fingerprint} );
1864                                                 $mr->master_record( $b->id );
1865                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1866                                         }
1867
1868                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1869                                         $mr_map->metarecord( $mr->id );
1870                                         $mr_map->source( $b->id );
1871                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1872
1873                                 }
1874                         }
1875                         $client->respond($b->id);
1876                 }
1877
1878         } otherwise {
1879                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1880                 $success = 0;
1881         };
1882
1883         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1884         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1885         return undef;
1886 }
1887 __PACKAGE__->register_method(  
1888         api_name        => "open-ils.worm.fingerprint.record.update",
1889         method          => "refingerprint_bibrec",
1890         api_level       => 1,
1891         argc            => 1,
1892         stream          => 1,
1893 );                      
1894
1895 __PACKAGE__->register_method(  
1896         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
1897         method          => "refingerprint_bibrec",
1898         api_level       => 1,
1899         argc            => 1,
1900 );                      
1901
1902 =comment
1903
1904 sub fingerprint_bibrec {
1905         my $self = shift;
1906         my $client = shift;
1907         my $rec = shift;
1908
1909         OpenILS::Application::Ingest->post_init();
1910         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1911
1912         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1913         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1914         return $fp;
1915
1916 }
1917 __PACKAGE__->register_method(  
1918         api_name        => "open-ils.worm.fingerprint.record",
1919         method          => "fingerprint_bibrec",
1920         api_level       => 0,
1921         argc            => 1,
1922 );                      
1923
1924
1925 sub fingerprint_mods {
1926         my $self = shift;
1927         my $client = shift;
1928         my $xml = shift;
1929
1930         OpenILS::Application::Ingest->post_init();
1931         my $mods = $parser->parse_string($xml)->documentElement;
1932
1933         return _fp_mods( $mods );
1934 }
1935 __PACKAGE__->register_method(  
1936         api_name        => "open-ils.worm.fingerprint.mods",
1937         method          => "fingerprint_mods",
1938         api_level       => 1,
1939         argc            => 1,
1940 );                      
1941
1942 sub fingerprint_marc {
1943         my $self = shift;
1944         my $client = shift;
1945         my $xml = shift;
1946
1947         $xml = $parser->parse_string($xml) unless (ref $xml);
1948
1949         OpenILS::Application::Ingest->post_init();
1950         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1951         $log->debug("Returning [$fp] as fingerprint", INFO);
1952         return $fp;
1953 }
1954 __PACKAGE__->register_method(  
1955         api_name        => "open-ils.worm.fingerprint.marc",
1956         method          => "fingerprint_marc",
1957         api_level       => 1,
1958         argc            => 1,
1959 );                      
1960
1961
1962 =cut
1963
1964 sub biblio_fingerprint_record {
1965         my $self = shift;
1966         my $client = shift;
1967         my $rec = shift;
1968
1969         OpenILS::Application::Ingest->post_init();
1970
1971         my $marc = OpenILS::Application::Ingest
1972                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1973                         ->marc;
1974
1975         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1976         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1977         return $fp;
1978 }
1979 __PACKAGE__->register_method(  
1980         api_name        => "open-ils.worm.fingerprint.record",
1981         method          => "biblio_fingerprint_record",
1982         api_level       => 1,
1983         argc            => 1,
1984 );                      
1985
1986 our $fp_script;
1987 sub biblio_fingerprint {
1988         my $self = shift;
1989         my $client = shift;
1990         my $marc = shift;
1991
1992         OpenILS::Application::Ingest->post_init();
1993
1994         $marc = $parser->parse_string($marc) unless (ref $marc);
1995
1996         my $mods = OpenILS::Application::Ingest::entityize(
1997                 $mods_sheet
1998                         ->transform( $marc )
1999                         ->documentElement
2000                         ->toString,
2001                 'D'
2002         );
2003
2004         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2005
2006         warn $marc;
2007         $log->internal("Got MARC [$marc]");
2008         $log->internal("Created MODS [$mods]");
2009
2010         if(!$fp_script) {
2011                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2012                 my $conf = OpenSRF::Utils::SettingsClient->new;
2013
2014                 my $libs        = $conf->config_value(@pfx, 'script_path');
2015                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2016                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2017
2018                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2019                 
2020                 $fp_script = new OpenILS::Utils::ScriptRunner
2021                         ( file          => $script_file,
2022                           paths         => $script_libs,
2023                           reset_count   => 1000 );
2024         }
2025
2026         $log->debug("Applying environment for biblio fingerprinting...");
2027
2028         my $env = {marc => $marc, mods => $mods};
2029         #my $res = {fingerprint => '', quality => '0'};
2030
2031         $fp_script->insert('environment' => $env);
2032         #$fp_script->insert('result' => $res);
2033
2034         $log->debug("Running script for biblio fingerprinting...");
2035
2036         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2037
2038         $log->debug("Script for biblio fingerprinting completed successfully...");
2039
2040         return $res;
2041 }
2042 __PACKAGE__->register_method(  
2043         api_name        => "open-ils.worm.fingerprint.marc",
2044         method          => "biblio_fingerprint",
2045         api_level       => 1,
2046         argc            => 1,
2047 );                      
2048
2049 # --------------------------------------------------------------------------------
2050
2051 1;
2052
2053 __END__
2054 my $in_xact;
2055 my $begin;
2056 my $commit;
2057 my $rollback;
2058 my $lookup;
2059 my $update_entry;
2060 my $mr_lookup;
2061 my $mr_update;
2062 my $mr_create;
2063 my $create_source_map;
2064 my $sm_lookup;
2065 my $rm_old_rd;
2066 my $rm_old_sm;
2067 my $rm_old_fr;
2068 my $rm_old_tr;
2069 my $rm_old_ar;
2070 my $rm_old_sr;
2071 my $rm_old_kr;
2072 my $rm_old_ser;
2073
2074 my $fr_create;
2075 my $rd_create;
2076 my $create = {};
2077
2078 my %descriptor_code = (
2079         item_type => 'substr($ldr,6,1)',
2080         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2081         bib_level => 'substr($ldr,7,1)',
2082         control_type => 'substr($ldr,8,1)',
2083         char_encoding => 'substr($ldr,9,1)',
2084         enc_level => 'substr($ldr,17,1)',
2085         cat_form => 'substr($ldr,18,1)',
2086         pub_status => 'substr($ldr,5,1)',
2087         item_lang => 'substr($oo8,35,3)',
2088         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2089         audience => 'substr($oo8,22,1)',
2090 );
2091
2092 sub wormize {
2093
2094         my $self = shift;
2095         my $client = shift;
2096         my @docids = @_;
2097
2098         my $no_map = 0;
2099         if ($self->api_name =~ /no_map/o) {
2100                 $no_map = 1;
2101         }
2102
2103         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2104                 unless ($in_xact);
2105         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2106                 unless ($begin);
2107         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2108                 unless ($commit);
2109         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2110                 unless ($rollback);
2111         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2112                 unless ($sm_lookup);
2113         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2114                 unless ($mr_lookup);
2115         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2116                 unless ($mr_update);
2117         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2118                 unless ($lookup);
2119         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2120                 unless ($update_entry);
2121         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2122                 unless ($rm_old_sm);
2123         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2124                 unless ($rm_old_rd);
2125         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2126                 unless ($rm_old_fr);
2127         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2128                 unless ($rm_old_tr);
2129         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2130                 unless ($rm_old_ar);
2131         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2132                 unless ($rm_old_sr);
2133         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2134                 unless ($rm_old_kr);
2135         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2136                 unless ($rm_old_ser);
2137         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2138                 unless ($mr_create);
2139         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2140                 unless ($create_source_map);
2141         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2142                 unless ($rd_create);
2143         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2144                 unless ($fr_create);
2145         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2146                 unless ($$create{title});
2147         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2148                 unless ($$create{author});
2149         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2150                 unless ($$create{subject});
2151         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2152                 unless ($$create{keyword});
2153         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2154                 unless ($$create{series});
2155
2156
2157         my ($outer_xact) = $in_xact->run;
2158         try {
2159                 unless ($outer_xact) {
2160                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2161                         my ($r) = $begin->run($client);
2162                         unless (defined $r and $r) {
2163                                 $rollback->run;
2164                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2165                         }
2166                 }
2167         } catch Error with {
2168                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2169         };
2170
2171         my @source_maps;
2172         my @entry_list;
2173         my @mr_list;
2174         my @rd_list;
2175         my @ns_list;
2176         my @mods_data;
2177         my $ret = 0;
2178         for my $entry ( $lookup->run(@docids) ) {
2179                 # step -1: grab the doc from storage
2180                 next unless ($entry);
2181
2182                 if(!$mods_sheet) {
2183                         my $xslt_doc = $parser->parse_file(
2184                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2185                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2186                 }
2187
2188                 my $xml = $entry->marc;
2189                 my $docid = $entry->id;
2190                 my $marcdoc = $parser->parse_string($xml);
2191                 my $modsdoc = $mods_sheet->transform($marcdoc);
2192
2193                 my $mods = $modsdoc->documentElement;
2194                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2195
2196                 $entry->fingerprint( fingerprint_mods( $mods ) );
2197                 push @entry_list, $entry;
2198
2199                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2200
2201                 unless ($no_map) {
2202                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2203                         if (!$mr || !@$mr) {
2204                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2205                                 $mr = new Fieldmapper::metabib::metarecord;
2206                                 $mr->fingerprint( $entry->fingerprint );
2207                                 $mr->master_record( $entry->id );
2208                                 my ($new_mr) = $mr_create->run($mr);
2209                                 $mr->id($new_mr);
2210                                 unless (defined $mr) {
2211                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2212                                 }
2213                         } else {
2214                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2215                                 $mr->mods('');
2216                                 push @mr_list, $mr;
2217                         }
2218
2219                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2220                         $sm->metarecord( $mr->id );
2221                         $sm->source( $entry->id );
2222                         push @source_maps, $sm;
2223                 }
2224
2225                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2226                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2227
2228                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2229                 for my $rd_field ( keys %descriptor_code ) {
2230                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2231                 }
2232                 $rd_obj->record( $docid );
2233                 push @rd_list, $rd_obj;
2234
2235                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2236
2237                 # step 2: build the KOHA rows
2238                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2239                 $_->record( $docid ) for (@tmp_list);
2240                 push @ns_list, @tmp_list;
2241
2242                 $ret++;
2243
2244                 last unless ($self->api_name =~ /batch$/o);
2245         }
2246
2247         $rm_old_rd->run( { record => \@docids } );
2248         $rm_old_fr->run( { record => \@docids } );
2249         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2250         $rm_old_tr->run( { source => \@docids } );
2251         $rm_old_ar->run( { source => \@docids } );
2252         $rm_old_sr->run( { source => \@docids } );
2253         $rm_old_kr->run( { source => \@docids } );
2254         $rm_old_ser->run( { source => \@docids } );
2255
2256         unless ($no_map) {
2257                 my ($sm) = $create_source_map->run(@source_maps);
2258                 unless (defined $sm) {
2259                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2260                 }
2261                 my ($mr) = $mr_update->run(@mr_list);
2262                 unless (defined $mr) {
2263                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2264                 }
2265         }
2266
2267         my ($re) = $update_entry->run(@entry_list);
2268         unless (defined $re) {
2269                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2270         }
2271
2272         my ($rd) = $rd_create->run(@rd_list);
2273         unless (defined $rd) {
2274                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2275         }
2276
2277         my ($fr) = $fr_create->run(@ns_list);
2278         unless (defined $fr) {
2279                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2280         }
2281
2282         # step 5: insert the new metadata
2283         for my $class ( qw/title author subject keyword series/ ) {
2284                 my @md_list = ();
2285                 for my $doc ( @mods_data ) {
2286                         my ($did) = keys %$doc;
2287                         my ($data) = values %$doc;
2288
2289                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2290                         for my $row ( keys %{ $$data{$class} } ) {
2291                                 next unless (exists $$data{$class}{$row});
2292                                 next unless ($$data{$class}{$row}{value});
2293                                 my $fm_obj = $fm_constructor->new;
2294                                 $fm_obj->value( $$data{$class}{$row}{value} );
2295                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2296                                 $fm_obj->source( $did );
2297                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2298
2299                                 push @md_list, $fm_obj;
2300                         }
2301                 }
2302                         
2303                 my ($cr) = $$create{$class}->run(@md_list);
2304                 unless (defined $cr) {
2305                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2306                 }
2307         }
2308
2309         unless ($outer_xact) {
2310                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2311                 my ($c) = $commit->run;
2312                 unless (defined $c and $c) {
2313                         $rollback->run;
2314                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2315                 }
2316         }
2317
2318         return $ret;
2319 }
2320 __PACKAGE__->register_method( 
2321         api_name        => "open-ils.worm.wormize",
2322         method          => "wormize",
2323         api_level       => 1,
2324         argc            => 1,
2325 );
2326 __PACKAGE__->register_method( 
2327         api_name        => "open-ils.worm.wormize.no_map",
2328         method          => "wormize",
2329         api_level       => 1,
2330         argc            => 1,
2331 );
2332 __PACKAGE__->register_method( 
2333         api_name        => "open-ils.worm.wormize.batch",
2334         method          => "wormize",
2335         api_level       => 1,
2336         argc            => 1,
2337 );
2338 __PACKAGE__->register_method( 
2339         api_name        => "open-ils.worm.wormize.no_map.batch",
2340         method          => "wormize",
2341         api_level       => 1,
2342         argc            => 1,
2343 );
2344
2345
2346 my $ain_xact;
2347 my $abegin;
2348 my $acommit;
2349 my $arollback;
2350 my $alookup;
2351 my $aupdate_entry;
2352 my $amr_lookup;
2353 my $amr_update;
2354 my $amr_create;
2355 my $acreate_source_map;
2356 my $asm_lookup;
2357 my $arm_old_rd;
2358 my $arm_old_sm;
2359 my $arm_old_fr;
2360 my $arm_old_tr;
2361 my $arm_old_ar;
2362 my $arm_old_sr;
2363 my $arm_old_kr;
2364 my $arm_old_ser;
2365
2366 my $afr_create;
2367 my $ard_create;
2368 my $acreate = {};
2369
2370 sub authority_wormize {
2371
2372         my $self = shift;
2373         my $client = shift;
2374         my @docids = @_;
2375
2376         my $no_map = 0;
2377         if ($self->api_name =~ /no_map/o) {
2378                 $no_map = 1;
2379         }
2380
2381         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2382                 unless ($in_xact);
2383         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2384                 unless ($begin);
2385         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2386                 unless ($commit);
2387         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2388                 unless ($rollback);
2389         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2390                 unless ($alookup);
2391         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2392                 unless ($aupdate_entry);
2393         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2394                 unless ($arm_old_rd);
2395         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2396                 unless ($arm_old_fr);
2397         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2398                 unless ($ard_create);
2399         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2400                 unless ($afr_create);
2401
2402
2403         my ($outer_xact) = $in_xact->run;
2404         try {
2405                 unless ($outer_xact) {
2406                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2407                         my ($r) = $begin->run($client);
2408                         unless (defined $r and $r) {
2409                                 $rollback->run;
2410                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2411                         }
2412                 }
2413         } catch Error with {
2414                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2415         };
2416
2417         my @source_maps;
2418         my @entry_list;
2419         my @mr_list;
2420         my @rd_list;
2421         my @ns_list;
2422         my @mads_data;
2423         my $ret = 0;
2424         for my $entry ( $lookup->run(@docids) ) {
2425                 # step -1: grab the doc from storage
2426                 next unless ($entry);
2427
2428                 #if(!$mads_sheet) {
2429                 #       my $xslt_doc = $parser->parse_file(
2430                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2431                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2432                 #}
2433
2434                 my $xml = $entry->marc;
2435                 my $docid = $entry->id;
2436                 my $marcdoc = $parser->parse_string($xml);
2437                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2438
2439                 #my $mads = $madsdoc->documentElement;
2440                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2441
2442                 push @entry_list, $entry;
2443
2444                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2445                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2446
2447                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2448                 for my $rd_field ( keys %descriptor_code ) {
2449                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2450                 }
2451                 $rd_obj->record( $docid );
2452                 push @rd_list, $rd_obj;
2453
2454                 # step 2: build the KOHA rows
2455                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2456                 $_->record( $docid ) for (@tmp_list);
2457                 push @ns_list, @tmp_list;
2458
2459                 $ret++;
2460
2461                 last unless ($self->api_name =~ /batch$/o);
2462         }
2463
2464         $arm_old_rd->run( { record => \@docids } );
2465         $arm_old_fr->run( { record => \@docids } );
2466
2467         my ($rd) = $ard_create->run(@rd_list);
2468         unless (defined $rd) {
2469                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2470         }
2471
2472         my ($fr) = $fr_create->run(@ns_list);
2473         unless (defined $fr) {
2474                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2475         }
2476
2477         unless ($outer_xact) {
2478                 $log->debug("Commiting transaction started by Ingest.", INFO);
2479                 my ($c) = $commit->run;
2480                 unless (defined $c and $c) {
2481                         $rollback->run;
2482                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2483                 }
2484         }
2485
2486         return $ret;
2487 }
2488 __PACKAGE__->register_method( 
2489         api_name        => "open-ils.worm.authortiy.wormize",
2490         method          => "wormize",
2491         api_level       => 1,
2492         argc            => 1,
2493 );
2494 __PACKAGE__->register_method( 
2495         api_name        => "open-ils.worm.authority.wormize.batch",
2496         method          => "wormize",
2497         api_level       => 1,
2498         argc            => 1,
2499 );
2500
2501
2502 # --------------------------------------------------------------------------------
2503
2504
2505 sub _marcxml_to_full_rows {
2506
2507         my $marcxml = shift;
2508         my $type = shift || 'Fieldmapper::metabib::full_rec';
2509
2510         my @ns_list;
2511         
2512         my $root = $marcxml->documentElement;
2513
2514         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2515                 next unless $tagline;
2516
2517                 my $ns = new Fieldmapper::metabib::full_rec;
2518
2519                 $ns->tag( 'LDR' );
2520                 my $val = NFD($tagline->textContent);
2521                 $val =~ s/(\pM+)//gso;
2522                 $ns->value( $val );
2523
2524                 push @ns_list, $ns;
2525         }
2526
2527         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2528                 next unless $tagline;
2529
2530                 my $ns = new Fieldmapper::metabib::full_rec;
2531
2532                 $ns->tag( $tagline->getAttribute( "tag" ) );
2533                 my $val = NFD($tagline->textContent);
2534                 $val =~ s/(\pM+)//gso;
2535                 $ns->value( $val );
2536
2537                 push @ns_list, $ns;
2538         }
2539
2540         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2541                 next unless $tagline;
2542
2543                 my $tag = $tagline->getAttribute( "tag" );
2544                 my $ind1 = $tagline->getAttribute( "ind1" );
2545                 my $ind2 = $tagline->getAttribute( "ind2" );
2546
2547                 for my $data ( $tagline->childNodes ) {
2548                         next unless $data;
2549
2550                         my $ns = $type->new;
2551
2552                         $ns->tag( $tag );
2553                         $ns->ind1( $ind1 );
2554                         $ns->ind2( $ind2 );
2555                         $ns->subfield( $data->getAttribute( "code" ) );
2556                         my $val = NFD($data->textContent);
2557                         $val =~ s/(\pM+)//gso;
2558                         $ns->value( lc($val) );
2559
2560                         push @ns_list, $ns;
2561                 }
2562         }
2563         return @ns_list;
2564 }
2565
2566 sub _get_field_value {
2567
2568         my( $root, $xpath ) = @_;
2569
2570         my $string = "";
2571
2572         # grab the set of matching nodes
2573         my @nodes = $root->findnodes( $xpath );
2574         for my $value (@nodes) {
2575
2576                 # grab all children of the node
2577                 my @children = $value->childNodes();
2578                 for my $child (@children) {
2579
2580                         # add the childs content to the growing buffer
2581                         my $content = quotemeta($child->textContent);
2582                         next if ($string =~ /$content/);  # uniquify the values
2583                         $string .= $child->textContent . " ";
2584                 }
2585                 if( ! @children ) {
2586                         $string .= $value->textContent . " ";
2587                 }
2588         }
2589         $string = NFD($string);
2590         $string =~ s/(\pM)//gso;
2591         return lc($string);
2592 }
2593
2594
2595 sub modsdoc_to_values {
2596         my( $self, $mods ) = @_;
2597         my $data = {};
2598         for my $class (keys %$xpathset) {
2599                 $data->{$class} = {};
2600                 for my $type (keys %{$xpathset->{$class}}) {
2601                         $data->{$class}->{$type} = {};
2602                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2603                 }
2604         }
2605         return $data;
2606 }
2607
2608
2609 1;
2610
2611