]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
refreshing the JS context to prevent memory leaks
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => ''},
25         oai_dc  => {ns => ''},
26         rdf_dc  => {ns => ''},
27 );
28
29
30 our $log = 'OpenSRF::Utils::Logger';
31
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
34
35 our $mods_sheet;
36 our $mads_sheet;
37 our $xpathset = {};
38 sub initialize {}
39 sub child_init {}
40
41 sub post_init {
42
43         unless (keys %$xpathset) {
44                 $log->debug("Running post_init", DEBUG);
45
46                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
47
48                 unless ($supported_formats{mods}{xslt}) {
49                         $log->debug("Loading MODS XSLT", DEBUG);
50                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
52                 }
53
54                 unless ($supported_formats{mods3}{xslt}) {
55                         $log->debug("Loading MODS v3 XSLT", DEBUG);
56                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
58                 }
59
60
61                 my $req = OpenSRF::AppSession
62                                 ->create('open-ils.cstore')
63                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
64                                 ->gather(1);
65
66                 if (ref $req and @$req) {
67                         for my $f (@$req) {
68                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
72                         }
73                 }
74         }
75 }
76
77 sub entityize {
78         my $stuff = shift;
79         my $form = shift;
80
81         if ($form eq 'D') {
82                 $stuff = NFD($stuff);
83         } else {
84                 $stuff = NFC($stuff);
85         }
86
87         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
88         return $stuff;
89 }
90
91 sub ro_biblio_ingest_single_object {
92         my $self = shift;
93         my $client = shift;
94         my $bib = shift;
95         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
96
97         my $document = $parser->parse_string($xml);
98
99         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
100         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
101         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
102         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
103
104         $_->source($bib->id) for (@mXfe);
105         $_->record($bib->id) for (@mfr);
106         $rd->record($bib->id) if ($rd);
107
108         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
109 }
110 __PACKAGE__->register_method(  
111         api_name        => "open-ils.ingest.full.biblio.object.readonly",
112         method          => "ro_biblio_ingest_single_object",
113         api_level       => 1,
114         argc            => 1,
115 );                      
116
117 sub ro_biblio_ingest_single_xml {
118         my $self = shift;
119         my $client = shift;
120         my $xml = OpenILS::Application::Ingest::entityize(shift);
121
122         my $document = $parser->parse_string($xml);
123
124         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
125         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
126         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
127         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
128
129         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
130 }
131 __PACKAGE__->register_method(  
132         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
133         method          => "ro_biblio_ingest_single_xml",
134         api_level       => 1,
135         argc            => 1,
136 );                      
137
138 sub ro_biblio_ingest_single_record {
139         my $self = shift;
140         my $client = shift;
141         my $rec = shift;
142
143         OpenILS::Application::Ingest->post_init();
144         my $r = OpenSRF::AppSession
145                         ->create('open-ils.cstore')
146                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
147                         ->gather(1);
148
149         return undef unless ($r and @$r);
150
151         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
152
153         $_->source($rec) for (@{$res->{field_entries}});
154         $_->record($rec) for (@{$res->{full_rec}});
155         $res->{descriptor}->record($rec);
156
157         return $res;
158 }
159 __PACKAGE__->register_method(  
160         api_name        => "open-ils.ingest.full.biblio.record.readonly",
161         method          => "ro_biblio_ingest_single_record",
162         api_level       => 1,
163         argc            => 1,
164 );                      
165
166 sub ro_biblio_ingest_stream_record {
167         my $self = shift;
168         my $client = shift;
169
170         OpenILS::Application::Ingest->post_init();
171
172         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
173
174         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
175         
176                 my $rec = $resp->content;
177                 last unless (defined $rec);
178
179                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
180                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
181
182                 $_->source($rec) for (@{$res->{field_entries}});
183                 $_->record($rec) for (@{$res->{full_rec}});
184
185                 $client->respond( $res );
186         }
187
188         return undef;
189 }
190 __PACKAGE__->register_method(  
191         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
192         method          => "ro_biblio_ingest_stream_record",
193         api_level       => 1,
194         stream          => 1,
195 );                      
196
197 sub ro_biblio_ingest_stream_xml {
198         my $self = shift;
199         my $client = shift;
200
201         OpenILS::Application::Ingest->post_init();
202
203         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
204
205         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
206         
207                 my $xml = $resp->content;
208                 last unless (defined $xml);
209
210                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
211                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
212
213                 $client->respond( $res );
214         }
215
216         return undef;
217 }
218 __PACKAGE__->register_method(  
219         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
220         method          => "ro_biblio_ingest_stream_xml",
221         api_level       => 1,
222         stream          => 1,
223 );                      
224
225 sub rw_biblio_ingest_stream_import {
226         my $self = shift;
227         my $client = shift;
228
229         OpenILS::Application::Ingest->post_init();
230
231         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
232
233         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
234         
235                 my $bib = $resp->content;
236                 last unless (defined $bib);
237
238                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
239                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
240
241                 $_->source($bib->id) for (@{$res->{field_entries}});
242                 $_->record($bib->id) for (@{$res->{full_rec}});
243
244                 $client->respond( $res );
245         }
246
247         return undef;
248 }
249 __PACKAGE__->register_method(  
250         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
251         method          => "rw_biblio_ingest_stream_import",
252         api_level       => 1,
253         stream          => 1,
254 );                      
255
256
257 # --------------------------------------------------------------------------------
258 # MARC index extraction
259
260 package OpenILS::Application::Ingest::XPATH;
261 use base qw/OpenILS::Application::Ingest/;
262 use Unicode::Normalize;
263
264 # give this an XML documentElement and an XPATH expression
265 sub xpath_to_string {
266         my $xml = shift;
267         my $xpath = shift;
268         my $ns_uri = shift;
269         my $ns_prefix = shift;
270         my $unique = shift;
271
272         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
273
274         my $string = "";
275
276         # grab the set of matching nodes
277         my @nodes = $xml->findnodes( $xpath );
278         for my $value (@nodes) {
279
280                 # grab all children of the node
281                 my @children = $value->childNodes();
282                 for my $child (@children) {
283
284                         # add the childs content to the growing buffer
285                         my $content = quotemeta($child->textContent);
286                         next if ($unique && $string =~ /$content/);  # uniquify the values
287                         $string .= $child->textContent . " ";
288                 }
289                 if( ! @children ) {
290                         $string .= $value->textContent . " ";
291                 }
292         }
293         return NFD($string);
294 }
295
296 sub class_index_string_xml {
297         my $self = shift;
298         my $client = shift;
299         my $xml = shift;
300         my @classes = @_;
301
302         OpenILS::Application::Ingest->post_init();
303         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
304
305         my %transform_cache;
306         
307         for my $class (@classes) {
308                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
309                 for my $type ( keys %{ $xpathset->{$class} } ) {
310
311                         my $def = $xpathset->{$class}->{$type};
312                         my $sf = $supported_formats{$def->{format}};
313
314                         my $document = $xml;
315
316                         if ($sf->{xslt}) {
317                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
318                                 $transform_cache{$def->{format}} = $document;
319                         }
320
321                         my $value =  xpath_to_string(
322                                         $document->documentElement      => $def->{xpath},
323                                         $sf->{ns}                       => $def->{format},
324                                         1
325                         );
326
327                         next unless $value;
328
329                         $value =~ s/\pM+//sgo;
330                         $value =~ s/\pC+//sgo;
331                         #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
332
333                         $value =~ s/(\w)\./$1/sgo;
334                         $value = lc($value);
335
336                         my $fm = $class_constructor->new;
337                         $fm->value( $value );
338                         $fm->field( $xpathset->{$class}->{$type}->{id} );
339                         $client->respond($fm);
340                 }
341         }
342         return undef;
343 }
344 __PACKAGE__->register_method(  
345         api_name        => "open-ils.ingest.field_entry.class.xml",
346         method          => "class_index_string_xml",
347         api_level       => 1,
348         argc            => 2,
349         stream          => 1,
350 );                      
351
352 sub class_index_string_record {
353         my $self = shift;
354         my $client = shift;
355         my $rec = shift;
356         my @classes = shift;
357
358         OpenILS::Application::Ingest->post_init();
359         my $r = OpenSRF::AppSession
360                         ->create('open-ils.cstore')
361                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
362                         ->gather(1);
363
364         return undef unless ($r and @$r);
365
366         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
367                 $fm->source($rec);
368                 $client->respond($fm);
369         }
370         return undef;
371 }
372 __PACKAGE__->register_method(  
373         api_name        => "open-ils.ingest.field_entry.class.record",
374         method          => "class_index_string_record",
375         api_level       => 1,
376         argc            => 2,
377         stream          => 1,
378 );                      
379
380 sub all_index_string_xml {
381         my $self = shift;
382         my $client = shift;
383         my $xml = shift;
384
385         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
386                 $client->respond($fm);
387         }
388         return undef;
389 }
390 __PACKAGE__->register_method(  
391         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
392         method          => "all_index_string_xml",
393         api_level       => 1,
394         argc            => 1,
395         stream          => 1,
396 );                      
397
398 sub all_index_string_record {
399         my $self = shift;
400         my $client = shift;
401         my $rec = shift;
402
403         OpenILS::Application::Ingest->post_init();
404         my $r = OpenSRF::AppSession
405                         ->create('open-ils.cstore')
406                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
407                         ->gather(1);
408
409         return undef unless ($r and @$r);
410
411         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
412                 $fm->source($rec);
413                 $client->respond($fm);
414         }
415         return undef;
416 }
417 __PACKAGE__->register_method(  
418         api_name        => "open-ils.ingest.extract.field_entry.all.record",
419         method          => "all_index_string_record",
420         api_level       => 1,
421         argc            => 1,
422         stream          => 1,
423 );                      
424
425 # --------------------------------------------------------------------------------
426 # Flat MARC
427
428 package OpenILS::Application::Ingest::FlatMARC;
429 use base qw/OpenILS::Application::Ingest/;
430 use Unicode::Normalize;
431
432
433 sub _marcxml_to_full_rows {
434
435         my $marcxml = shift;
436         my $xmltype = shift || 'metabib';
437
438         my $type = "Fieldmapper::${xmltype}::full_rec";
439
440         my @ns_list;
441         
442         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
443
444         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
445                 next unless $tagline;
446
447                 my $ns = $type->new;
448
449                 $ns->tag( 'LDR' );
450                 my $val = $tagline->textContent;
451                 $val = NFD($val);
452                 $val =~ s/(\pM+)//gso;
453                 $ns->value( $val );
454
455                 push @ns_list, $ns;
456         }
457
458         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
459                 next unless $tagline;
460
461                 my $ns = $type->new;
462
463                 $ns->tag( $tagline->getAttribute( "tag" ) );
464                 my $val = $tagline->textContent;
465                 $val = NFD($val);
466                 $val =~ s/(\pM+)//gso;
467                 $ns->value( $val );
468
469                 push @ns_list, $ns;
470         }
471
472         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
473                 next unless $tagline;
474
475                 my $tag = $tagline->getAttribute( "tag" );
476                 my $ind1 = $tagline->getAttribute( "ind1" );
477                 my $ind2 = $tagline->getAttribute( "ind2" );
478
479                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
480                         next unless $data;
481
482                         my $ns = $type->new;
483
484                         $ns->tag( $tag );
485                         $ns->ind1( $ind1 );
486                         $ns->ind2( $ind2 );
487                         $ns->subfield( $data->getAttribute( "code" ) );
488                         my $val = $data->textContent;
489                         $val = NFD($val);
490                         $val =~ s/(\pM+)//gso;
491                         $ns->value( lc($val) );
492
493                         push @ns_list, $ns;
494                 }
495         }
496
497         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
498         return @ns_list;
499 }
500
501 sub flat_marc_xml {
502         my $self = shift;
503         my $client = shift;
504         my $xml = shift;
505
506         $log->debug("processing [$xml]");
507
508         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
509
510         my $type = 'metabib';
511         $type = 'authority' if ($self->api_name =~ /authority/o);
512
513         OpenILS::Application::Ingest->post_init();
514
515         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
516         return undef;
517 }
518 __PACKAGE__->register_method(  
519         api_name        => "open-ils.ingest.flat_marc.authority.xml",
520         method          => "flat_marc_xml",
521         api_level       => 1,
522         argc            => 1,
523         stream          => 1,
524 );                      
525 __PACKAGE__->register_method(  
526         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
527         method          => "flat_marc_xml",
528         api_level       => 1,
529         argc            => 1,
530         stream          => 1,
531 );                      
532
533 sub flat_marc_record {
534         my $self = shift;
535         my $client = shift;
536         my $rec = shift;
537
538         my $type = 'biblio';
539         $type = 'authority' if ($self->api_name =~ /authority/o);
540
541         OpenILS::Application::Ingest->post_init();
542         my $r = OpenSRF::AppSession
543                         ->create('open-ils.cstore')
544                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
545                         ->gather(1);
546
547
548         return undef unless ($r and $r->marc);
549
550         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
551         for my $row (@rows) {
552                 $client->respond($row);
553                 $log->debug(JSON->perl2JSON($row), DEBUG);
554         }
555         return undef;
556 }
557 __PACKAGE__->register_method(  
558         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
559         method          => "flat_marc_record",
560         api_level       => 1,
561         argc            => 1,
562         stream          => 1,
563 );                      
564 __PACKAGE__->register_method(  
565         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
566         method          => "flat_marc_record",
567         api_level       => 1,
568         argc            => 1,
569         stream          => 1,
570 );                      
571
572 # --------------------------------------------------------------------------------
573 # Fingerprinting
574
575 package OpenILS::Application::Ingest::Biblio::Fingerprint;
576 use base qw/OpenILS::Application::Ingest/;
577 use Unicode::Normalize;
578 use OpenSRF::EX qw/:try/;
579
580 sub biblio_fingerprint_record {
581         my $self = shift;
582         my $client = shift;
583         my $rec = shift;
584
585         OpenILS::Application::Ingest->post_init();
586
587         my $r = OpenSRF::AppSession
588                         ->create('open-ils.cstore')
589                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
590                         ->gather(1);
591
592         return undef unless ($r and $r->marc);
593
594         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
595         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
596         return $fp;
597 }
598 __PACKAGE__->register_method(  
599         api_name        => "open-ils.ingest.fingerprint.record",
600         method          => "biblio_fingerprint_record",
601         api_level       => 1,
602         argc            => 1,
603 );                      
604
605 our $fp_script;
606 sub biblio_fingerprint {
607         my $self = shift;
608         my $client = shift;
609         my $xml = OpenILS::Application::Ingest::entityize(shift);
610
611         $log->internal("Got MARC [$xml]");
612
613         if(!$fp_script) {
614                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
615                 my $conf = OpenSRF::Utils::SettingsClient->new;
616
617                 my $libs        = $conf->config_value(@pfx, 'script_path');
618                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
619                 my $script_libs = (ref($libs)) ? $libs : [$libs];
620
621                 $log->debug("Loading script $script_file for biblio fingerprinting...");
622                 
623                 $fp_script = new OpenILS::Utils::ScriptRunner
624                         ( file          => $script_file,
625                           paths         => $script_libs,
626                           reset_count   => 100 );
627         }
628
629         $fp_script->insert('environment' => {marc => $xml} => 1);
630
631         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
632         $log->debug("Script for biblio fingerprinting completed successfully...");
633
634         return $res;
635 }
636 __PACKAGE__->register_method(  
637         api_name        => "open-ils.ingest.fingerprint.xml",
638         method          => "biblio_fingerprint",
639         api_level       => 1,
640         argc            => 1,
641 );                      
642
643 our $rd_script;
644 sub biblio_descriptor {
645         my $self = shift;
646         my $client = shift;
647         my $xml = OpenILS::Application::Ingest::entityize(shift);
648
649         $log->internal("Got MARC [$xml]");
650
651         if(!$rd_script) {
652                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
653                 my $conf = OpenSRF::Utils::SettingsClient->new;
654
655                 my $libs        = $conf->config_value(@pfx, 'script_path');
656                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
657                 my $script_libs = (ref($libs)) ? $libs : [$libs];
658
659                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
660                 
661                 $rd_script = new OpenILS::Utils::ScriptRunner
662                         ( file          => $script_file,
663                           paths         => $script_libs,
664                           reset_count   => 100 );
665         }
666
667         $log->debug("Setting up environment for descriptor extraction script...");
668         $rd_script->insert('environment.marc' => $xml => 1);
669         $log->debug("Environment building complete...");
670
671         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
672         $log->debug("Script for biblio descriptor extraction completed successfully");
673
674         return $res;
675 }
676 __PACKAGE__->register_method(  
677         api_name        => "open-ils.ingest.descriptor.xml",
678         method          => "biblio_descriptor",
679         api_level       => 1,
680         argc            => 1,
681 );                      
682
683
684 1;
685
686 __END__
687
688 sub in_transaction {
689         OpenILS::Application::Ingest->post_init();
690         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
691 }
692
693 sub begin_transaction {
694         my $self = shift;
695         my $client = shift;
696         
697         OpenILS::Application::Ingest->post_init();
698         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
699         
700         try {
701                 if (!$outer_xact) {
702                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
703                         #__PACKAGE__->st_sess->connect;
704                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
705                         unless (defined $r and $r) {
706                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
707                                 #__PACKAGE__->st_sess->disconnect;
708                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
709                         }
710                 }
711         } otherwise {
712                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
713         };
714
715         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
716 }
717
718 sub rollback_transaction {
719         my $self = shift;
720         my $client = shift;
721
722         OpenILS::Application::Ingest->post_init();
723         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
724
725         try {
726                 if ($outer_xact) {
727                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
728                 } else {
729                         $log->debug("Ingest isn't inside a transaction.", INFO);
730                 }
731         } catch Error with {
732                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
733         };
734
735         return 1;
736 }
737
738 sub commit_transaction {
739         my $self = shift;
740         my $client = shift;
741
742         OpenILS::Application::Ingest->post_init();
743         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
744
745         try {
746                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
747                 if ($outer_xact) {
748                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
749                         unless (defined $r and $r) {
750                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
751                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
752                         }
753                         #__PACKAGE__->st_sess->disconnect;
754                 } else {
755                         $log->debug("Ingest isn't inside a transaction.", INFO);
756                 }
757         } catch Error with {
758                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
759         };
760
761         return 1;
762 }
763
764 sub storage_req {
765         my $self = shift;
766         my $method = shift;
767         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
768         return shift( @res );
769 }
770
771 sub scrub_authority_record {
772         my $self = shift;
773         my $client = shift;
774         my $rec = shift;
775
776         my $commit = 0;
777         if (!OpenILS::Application::Ingest->in_transaction) {
778                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
779                 $commit = 1;
780         }
781
782         my $success = 1;
783         try {
784                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
785
786                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
787                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
788
789                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
790         } otherwise {
791                 $log->debug('Scrubbing failed : '.shift(), ERROR);
792                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
793                 $success = 0;
794         };
795
796         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
797         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
798         return $success;
799 }
800 __PACKAGE__->register_method(  
801         api_name        => "open-ils.worm.scrub.authority",
802         method          => "scrub_authority_record",
803         api_level       => 1,
804         argc            => 1,
805 );                      
806
807
808 sub scrub_metabib_record {
809         my $self = shift;
810         my $client = shift;
811         my $rec = shift;
812
813         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
814                 $rec = OpenILS::Application::Ingest->storage_req(
815                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
816                 );
817         }
818
819         my $commit = 0;
820         if (!OpenILS::Application::Ingest->in_transaction) {
821                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
822                 $commit = 1;
823         }
824
825         my $success = 1;
826         try {
827                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
828                 
829                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
830                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
831                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
832                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
833                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
834                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
835                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
836                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
837
838                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
839                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
840
841                 for my $mr (@$masters) {
842                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
843                         my $others = OpenILS::Application::Ingest->storage_req(
844                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
845
846                         if (@$others) {
847                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
848                                 $mr->master_record($others->[0]->source);
849                                 OpenILS::Application::Ingest->storage_req(
850                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
851                                         { id => $mr->id },
852                                         { master_record => $others->[0]->source, mods => undef }
853                                 );
854                         } else {
855                                 warn "Removing metarecord whose master is $rec";
856                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
857                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
858                                 warn "Metarecord removed";
859                                 $log->debug( "Metarecord removed", DEBUG);
860                         }
861                 }
862
863                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
864
865         } otherwise {
866                 $log->debug('Scrubbing failed : '.shift(), ERROR);
867                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
868                 $success = 0;
869         };
870
871         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
872         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
873         return $success;
874 }
875 __PACKAGE__->register_method(  
876         api_name        => "open-ils.worm.scrub.biblio",
877         method          => "scrub_metabib_record",
878         api_level       => 1,
879         argc            => 1,
880 );                      
881
882 sub wormize_biblio_metarecord {
883         my $self = shift;
884         my $client = shift;
885         my $mrec = shift;
886
887         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
888
889         my $count = 0;
890         for my $r (@$recs) {
891                 my $success = 0;
892                 try {
893                         $success = wormize_biblio_record($self => $client => $r->source);
894                         $client->respond(
895                                 { record  => $r->source,
896                                   metarecord => $rec->metarecord,
897                                   success => $success,
898                                 }
899                         );
900                 } catch Error with {
901                         my $e = shift;
902                         $client->respond(
903                                 { record  => $r->source,
904                                   metarecord => $rec->metarecord,
905                                   success => $success,
906                                   error   => $e,
907                                 }
908                         );
909                 };
910         }
911         return undef;
912 }
913 __PACKAGE__->register_method(
914         api_name        => "open-ils.worm.wormize.metarecord",
915         method          => "wormize_biblio_metarecord",
916         api_level       => 1,
917         argc            => 1,
918         stream          => 1,
919 );
920 __PACKAGE__->register_method(
921         api_name        => "open-ils.worm.wormize.metarecord.nomap",
922         method          => "wormize_biblio_metarecord",
923         api_level       => 1,
924         argc            => 1,
925         stream          => 1,
926 );
927 __PACKAGE__->register_method(
928         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
929         method          => "wormize_biblio_metarecord",
930         api_level       => 1,
931         argc            => 1,
932         stream          => 1,
933 );
934 __PACKAGE__->register_method(
935         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
936         method          => "wormize_biblio_metarecord",
937         api_level       => 1,
938         argc            => 1,
939         stream          => 1,
940 );
941
942
943 sub wormize_biblio_record {
944         my $self = shift;
945         my $client = shift;
946         my $rec = shift;
947
948         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
949                 $rec = OpenILS::Application::Ingest->storage_req(
950                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
951                 );
952         }
953
954
955         my $commit = 0;
956         if (!OpenILS::Application::Ingest->in_transaction) {
957                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
958                 $commit = 1;
959         }
960
961         my $success = 1;
962         try {
963                 # clean up the cruft
964                 unless ($self->api_name =~ /noscrub/o) {
965                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
966                 }
967
968                 # now redo 'em
969                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
970
971                 my @full_rec = ();
972                 my @rec_descriptor = ();
973                 my %field_entry = (
974                         title   => [],
975                         author  => [],
976                         subject => [],
977                         keyword => [],
978                         series  => [],
979                 );
980                 my %metarecord = ();
981                 my @source_map = ();
982                 for my $r (@$bibs) {
983                         try {
984                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
985
986                                 my $xml = $parser->parse_string($r->marc);
987
988                                 #update the fingerprint
989                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
990                                 OpenILS::Application::Ingest->storage_req(
991                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
992                                         { id => $r->id },
993                                         { fingerprint => $fp->{fingerprint},
994                                           quality     => int($fp->{quality}) }
995                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
996
997                                 # the full_rec stuff
998                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
999                                         $fr->record( $r->id );
1000                                         push @full_rec, $fr;
1001                                 }
1002
1003                                 # the rec_descriptor stuff
1004                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1005                                 $rd->record( $r->id );
1006                                 push @rec_descriptor, $rd;
1007                         
1008                                 # the indexing field entry stuff
1009                                 for my $class ( qw/title author subject keyword series/ ) {
1010                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1011                                                 $fe->source( $r->id );
1012                                                 push @{$field_entry{$class}}, $fe;
1013                                         }
1014                                 }
1015
1016                                 unless ($self->api_name =~ /nomap/o) {
1017                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1018                                 
1019                                         unless ($mr) {
1020                                                 $mr = Fieldmapper::metabib::metarecord->new;
1021                                                 $mr->fingerprint( $fp->{fingerprint} );
1022                                                 $mr->master_record( $r->id );
1023                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1024                                         }
1025
1026                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1027                                         $mr_map->metarecord( $mr->id );
1028                                         $mr_map->source( $r->id );
1029                                         push @source_map, $mr_map;
1030
1031                                         $metarecord{$mr->id} = $mr;
1032                                 }
1033                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1034                         } otherwise {
1035                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1036                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1037                         };
1038                 }
1039                 
1040
1041                 if (@rec_descriptor) {
1042                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1043
1044                         OpenILS::Application::Ingest->storage_req(
1045                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1046                                 @source_map
1047                         ) if (@source_map);
1048
1049                         for my $mr ( values %metarecord ) {
1050                                 my $sources = OpenILS::Application::Ingest->storage_req(
1051                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1052                                         $mr->id
1053                                 );
1054
1055                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1056                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1057                                         [ map { $_->source } @$sources ]
1058                                 );
1059
1060                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1061
1062                                 OpenILS::Application::Ingest->storage_req(
1063                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1064                                         { id => $mr->id },
1065                                         { master_record => $master->id, mods => undef }
1066                                 );
1067                         }
1068
1069                         OpenILS::Application::Ingest->storage_req(
1070                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1071                                 @rec_descriptor
1072                         ) if (@rec_descriptor);
1073
1074                         OpenILS::Application::Ingest->storage_req(
1075                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1076                                 @full_rec
1077                         ) if (@full_rec);
1078
1079                         OpenILS::Application::Ingest->storage_req(
1080                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1081                                 @{ $field_entry{title} }
1082                         ) if (@{ $field_entry{title} });
1083
1084                         OpenILS::Application::Ingest->storage_req(
1085                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1086                                 @{ $field_entry{author} }
1087                         ) if (@{ $field_entry{author} });
1088                         
1089                         OpenILS::Application::Ingest->storage_req(
1090                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1091                                 @{ $field_entry{subject} }
1092                         ) if (@{ $field_entry{subject} });
1093
1094                         OpenILS::Application::Ingest->storage_req(
1095                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1096                                 @{ $field_entry{keyword} }
1097                         ) if (@{ $field_entry{keyword} });
1098
1099                         OpenILS::Application::Ingest->storage_req(
1100                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1101                                 @{ $field_entry{series} }
1102                         ) if (@{ $field_entry{series} });
1103
1104                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1105                 } else {
1106                         $success = 0;
1107                 }
1108
1109         } otherwise {
1110                 $log->debug('Wormization failed : '.shift(), ERROR);
1111                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1112                 $success = 0;
1113         };
1114
1115         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1116         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1117         return $success;
1118 }
1119 __PACKAGE__->register_method(
1120         api_name        => "open-ils.worm.wormize.biblio",
1121         method          => "wormize_biblio_record",
1122         api_level       => 1,
1123         argc            => 1,
1124 );
1125 __PACKAGE__->register_method(
1126         api_name        => "open-ils.worm.wormize.biblio.nomap",
1127         method          => "wormize_biblio_record",
1128         api_level       => 1,
1129         argc            => 1,
1130 );
1131 __PACKAGE__->register_method(
1132         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1133         method          => "wormize_biblio_record",
1134         api_level       => 1,
1135         argc            => 1,
1136 );
1137 __PACKAGE__->register_method(
1138         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1139         method          => "wormize_biblio_record",
1140         api_level       => 1,
1141         argc            => 1,
1142 );
1143
1144 sub wormize_authority_record {
1145         my $self = shift;
1146         my $client = shift;
1147         my $rec = shift;
1148
1149         my $commit = 0;
1150         if (!OpenILS::Application::Ingest->in_transaction) {
1151                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1152                 $commit = 1;
1153         }
1154
1155         my $success = 1;
1156         try {
1157                 # clean up the cruft
1158                 unless ($self->api_name =~ /noscrub/o) {
1159                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1160                 }
1161
1162                 # now redo 'em
1163                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1164
1165                 my @full_rec = ();
1166                 my @rec_descriptor = ();
1167                 for my $r (@$bibs) {
1168                         my $xml = $parser->parse_string($r->marc);
1169
1170                         # the full_rec stuff
1171                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1172                                 $fr->record( $r->id );
1173                                 push @full_rec, $fr;
1174                         }
1175
1176                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1177                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1178                         #$rd->record( $r->id );
1179                         #push @rec_descriptor, $rd;
1180                         
1181                 }
1182
1183                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1184
1185                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1186                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1187
1188                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1189
1190         } otherwise {
1191                 $log->debug('Wormization failed : '.shift(), ERROR);
1192                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1193                 $success = 0;
1194         };
1195
1196         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1197         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1198         return $success;
1199 }
1200 __PACKAGE__->register_method(
1201         api_name        => "open-ils.worm.wormize.authority",
1202         method          => "wormize_authority_record",
1203         api_level       => 1,
1204         argc            => 1,
1205 );
1206 __PACKAGE__->register_method(
1207         api_name        => "open-ils.worm.wormize.authority.noscrub",
1208         method          => "wormize_authority_record",
1209         api_level       => 1,
1210         argc            => 1,
1211 );
1212
1213
1214 # --------------------------------------------------------------------------------
1215 # MARC index extraction
1216
1217 package OpenILS::Application::Ingest::XPATH;
1218 use base qw/OpenILS::Application::Ingest/;
1219 use Unicode::Normalize;
1220
1221 # give this a MODS documentElement and an XPATH expression
1222 sub _xpath_to_string {
1223         my $xml = shift;
1224         my $xpath = shift;
1225         my $ns_uri = shift;
1226         my $ns_prefix = shift;
1227         my $unique = shift;
1228
1229         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1230
1231         my $string = "";
1232
1233         # grab the set of matching nodes
1234         my @nodes = $xml->findnodes( $xpath );
1235         for my $value (@nodes) {
1236
1237                 # grab all children of the node
1238                 my @children = $value->childNodes();
1239                 for my $child (@children) {
1240
1241                         # add the childs content to the growing buffer
1242                         my $content = quotemeta($child->textContent);
1243                         next if ($unique && $string =~ /$content/);  # uniquify the values
1244                         $string .= $child->textContent . " ";
1245                 }
1246                 if( ! @children ) {
1247                         $string .= $value->textContent . " ";
1248                 }
1249         }
1250         return NFD($string);
1251 }
1252
1253 sub class_all_index_string_xml {
1254         my $self = shift;
1255         my $client = shift;
1256         my $xml = shift;
1257         my $class = shift;
1258
1259         OpenILS::Application::Ingest->post_init();
1260         $xml = $parser->parse_string($xml) unless (ref $xml);
1261         
1262         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1263         for my $type ( keys %{ $xpathset->{$class} } ) {
1264                 my $value =  _xpath_to_string(
1265                                 $mods_sheet->transform($xml)->documentElement,
1266                                 $xpathset->{$class}->{$type}->{xpath},
1267                                 "http://www.loc.gov/mods/",
1268                                 "mods",
1269                                 1
1270                 );
1271
1272                 next unless $value;
1273
1274                 $value =~ s/\pM+//sgo;
1275                 $value =~ s/\pC+//sgo;
1276                 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1277
1278                 $value =~ s/(\w)\./$1/sgo;
1279                 $value = lc($value);
1280
1281                 my $fm = $class_constructor->new;
1282                 $fm->value( $value );
1283                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1284                 $client->respond($fm);
1285         }
1286         return undef;
1287 }
1288 __PACKAGE__->register_method(  
1289         api_name        => "open-ils.worm.field_entry.class.xml",
1290         method          => "class_all_index_string_xml",
1291         api_level       => 1,
1292         argc            => 1,
1293         stream          => 1,
1294 );                      
1295
1296 sub class_all_index_string_record {
1297         my $self = shift;
1298         my $client = shift;
1299         my $rec = shift;
1300         my $class = shift;
1301
1302         OpenILS::Application::Ingest->post_init();
1303         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1304
1305         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1306                 $fm->source($rec);
1307                 $client->respond($fm);
1308         }
1309         return undef;
1310 }
1311 __PACKAGE__->register_method(  
1312         api_name        => "open-ils.worm.field_entry.class.record",
1313         method          => "class_all_index_string_record",
1314         api_level       => 1,
1315         argc            => 1,
1316         stream          => 1,
1317 );                      
1318
1319
1320 sub class_index_string_xml {
1321         my $self = shift;
1322         my $client = shift;
1323         my $xml = shift;
1324         my $class = shift;
1325         my $type = shift;
1326
1327         OpenILS::Application::Ingest->post_init();
1328         $xml = $parser->parse_string($xml) unless (ref $xml);
1329         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1330 }
1331 __PACKAGE__->register_method(  
1332         api_name        => "open-ils.worm.class.type.xml",
1333         method          => "class_index_string_xml",
1334         api_level       => 1,
1335         argc            => 1,
1336 );                      
1337
1338 sub class_index_string_record {
1339         my $self = shift;
1340         my $client = shift;
1341         my $rec = shift;
1342         my $class = shift;
1343         my $type = shift;
1344
1345         OpenILS::Application::Ingest->post_init();
1346         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1347
1348         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1349         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1350         return $d;
1351 }
1352 __PACKAGE__->register_method(  
1353         api_name        => "open-ils.worm.class.type.record",
1354         method          => "class_index_string_record",
1355         api_level       => 1,
1356         argc            => 1,
1357 );                      
1358
1359 sub xml_xpath {
1360         my $self = shift;
1361         my $client = shift;
1362         my $xml = shift;
1363         my $xpath = shift;
1364         my $uri = shift;
1365         my $prefix = shift;
1366         my $unique = shift;
1367
1368         OpenILS::Application::Ingest->post_init();
1369         $xml = $parser->parse_string($xml) unless (ref $xml);
1370         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1371 }
1372 __PACKAGE__->register_method(  
1373         api_name        => "open-ils.worm.xpath.xml",
1374         method          => "xml_xpath",
1375         api_level       => 1,
1376         argc            => 1,
1377 );                      
1378
1379 sub record_xpath {
1380         my $self = shift;
1381         my $client = shift;
1382         my $rec = shift;
1383         my $xpath = shift;
1384         my $uri = shift;
1385         my $prefix = shift;
1386         my $unique = shift;
1387
1388         OpenILS::Application::Ingest->post_init();
1389         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1390
1391         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1392         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1393         return $d;
1394 }
1395 __PACKAGE__->register_method(  
1396         api_name        => "open-ils.worm.xpath.record",
1397         method          => "record_xpath",
1398         api_level       => 1,
1399         argc            => 1,
1400 );                      
1401
1402
1403 # --------------------------------------------------------------------------------
1404 # MARC Descriptor
1405
1406 package OpenILS::Application::Ingest::Biblio::Leader;
1407 use base qw/OpenILS::Application::Ingest/;
1408 use Unicode::Normalize;
1409
1410 our %marc_type_groups = (
1411         BKS => q/[at]{1}/,
1412         SER => q/[a]{1}/,
1413         VIS => q/[gkro]{1}/,
1414         MIX => q/[p]{1}/,
1415         MAP => q/[ef]{1}/,
1416         SCO => q/[cd]{1}/,
1417         REC => q/[ij]{1}/,
1418         COM => q/[m]{1}/,
1419 );
1420
1421 sub _type_re {
1422         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1423         return qr/$re/;
1424 }
1425
1426 our %biblio_descriptor_code = (
1427         item_type => sub { substr($ldr,6,1); },
1428         item_form =>
1429                 sub {
1430                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1431                                 return substr($oo8,29,1);
1432                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1433                                 return substr($oo8,23,1);
1434                         }
1435                         return ' ';
1436                 },
1437         bib_level => sub { substr($ldr,7,1); },
1438         control_type => sub { substr($ldr,8,1); },
1439         char_encoding => sub { substr($ldr,9,1); },
1440         enc_level => sub { substr($ldr,17,1); },
1441         cat_form => sub { substr($ldr,18,1); },
1442         pub_status => sub { substr($ldr,5,1); },
1443         item_lang => sub { substr($oo8,35,3); },
1444         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1445         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1446         audience => sub { substr($oo8,22,1); },
1447 );
1448
1449 sub _extract_biblio_descriptors {
1450         my $xml = shift;
1451
1452         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1453         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1454         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1455
1456         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1457         for my $rd_field ( keys %biblio_descriptor_code ) {
1458                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1459         }
1460
1461         return $rd_obj;
1462 }
1463
1464 sub extract_biblio_desc_xml {
1465         my $self = shift;
1466         my $client = shift;
1467         my $xml = shift;
1468
1469         $xml = $parser->parse_string($xml) unless (ref $xml);
1470
1471         return _extract_biblio_descriptors( $xml );
1472 }
1473 __PACKAGE__->register_method(  
1474         api_name        => "open-ils.worm.biblio_leader.xml",
1475         method          => "extract_biblio_desc_xml",
1476         api_level       => 1,
1477         argc            => 1,
1478 );                      
1479
1480 sub extract_biblio_desc_record {
1481         my $self = shift;
1482         my $client = shift;
1483         my $rec = shift;
1484
1485         OpenILS::Application::Ingest->post_init();
1486         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1487
1488         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1489         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1490         return $d;
1491 }
1492 __PACKAGE__->register_method(  
1493         api_name        => "open-ils.worm.biblio_leader.record",
1494         method          => "extract_biblio_desc_record",
1495         api_level       => 1,
1496         argc            => 1,
1497 );                      
1498
1499 # --------------------------------------------------------------------------------
1500 # Flat MARC
1501
1502 package OpenILS::Application::Ingest::FlatMARC;
1503 use base qw/OpenILS::Application::Ingest/;
1504 use Unicode::Normalize;
1505
1506
1507 sub _marcxml_to_full_rows {
1508
1509         my $marcxml = shift;
1510         my $xmltype = shift || 'metabib';
1511
1512         my $type = "Fieldmapper::${xmltype}::full_rec";
1513
1514         my @ns_list;
1515         
1516         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1517
1518         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1519                 next unless $tagline;
1520
1521                 my $ns = $type->new;
1522
1523                 $ns->tag( 'LDR' );
1524                 my $val = $tagline->textContent;
1525                 $val = NFD($val);
1526                 $val =~ s/(\pM+)//gso;
1527                 $ns->value( $val );
1528
1529                 push @ns_list, $ns;
1530         }
1531
1532         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1533                 next unless $tagline;
1534
1535                 my $ns = $type->new;
1536
1537                 $ns->tag( $tagline->getAttribute( "tag" ) );
1538                 my $val = $tagline->textContent;
1539                 $val = NFD($val);
1540                 $val =~ s/(\pM+)//gso;
1541                 $ns->value( $val );
1542
1543                 push @ns_list, $ns;
1544         }
1545
1546         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1547                 next unless $tagline;
1548
1549                 my $tag = $tagline->getAttribute( "tag" );
1550                 my $ind1 = $tagline->getAttribute( "ind1" );
1551                 my $ind2 = $tagline->getAttribute( "ind2" );
1552
1553                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1554                         next unless $data;
1555
1556                         my $ns = $type->new;
1557
1558                         $ns->tag( $tag );
1559                         $ns->ind1( $ind1 );
1560                         $ns->ind2( $ind2 );
1561                         $ns->subfield( $data->getAttribute( "code" ) );
1562                         my $val = $data->textContent;
1563                         $val = NFD($val);
1564                         $val =~ s/(\pM+)//gso;
1565                         $ns->value( lc($val) );
1566
1567                         push @ns_list, $ns;
1568                 }
1569         }
1570
1571         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1572         return @ns_list;
1573 }
1574
1575 sub flat_marc_xml {
1576         my $self = shift;
1577         my $client = shift;
1578         my $xml = shift;
1579
1580         $xml = $parser->parse_string($xml) unless (ref $xml);
1581
1582         my $type = 'metabib';
1583         $type = 'authority' if ($self->api_name =~ /authority/o);
1584
1585         OpenILS::Application::Ingest->post_init();
1586
1587         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1588         return undef;
1589 }
1590 __PACKAGE__->register_method(  
1591         api_name        => "open-ils.worm.flat_marc.authority.xml",
1592         method          => "flat_marc_xml",
1593         api_level       => 1,
1594         argc            => 1,
1595         stream          => 1,
1596 );                      
1597 __PACKAGE__->register_method(  
1598         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1599         method          => "flat_marc_xml",
1600         api_level       => 1,
1601         argc            => 1,
1602         stream          => 1,
1603 );                      
1604
1605 sub flat_marc_record {
1606         my $self = shift;
1607         my $client = shift;
1608         my $rec = shift;
1609
1610         my $type = 'biblio';
1611         $type = 'authority' if ($self->api_name =~ /authority/o);
1612
1613         OpenILS::Application::Ingest->post_init();
1614         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1615
1616         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1617         return undef;
1618 }
1619 __PACKAGE__->register_method(  
1620         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1621         method          => "flat_marc_record",
1622         api_level       => 1,
1623         argc            => 1,
1624         stream          => 1,
1625 );                      
1626 __PACKAGE__->register_method(  
1627         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1628         method          => "flat_marc_record",
1629         api_level       => 1,
1630         argc            => 1,
1631         stream          => 1,
1632 );                      
1633
1634
1635 # --------------------------------------------------------------------------------
1636 # Fingerprinting
1637
1638 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1639 use base qw/OpenILS::Application::Ingest/;
1640 use Unicode::Normalize;
1641 use OpenSRF::EX qw/:try/;
1642
1643 my @fp_mods_xpath = (
1644         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1645                         title   => {
1646                                         xpath   => [
1647                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1648                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1649                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1650                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1651                                         ],
1652                                         fixup   => sub {
1653                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1654                                                         $text = NFD($text);
1655                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1656                                                         $text =~ s/\pM+//gso;
1657                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1658                                                         $text = lc($text);
1659                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1660                                                         $text =~ s/\s+/ /sgo;
1661                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1662                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1663                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1664                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1665                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1666                                                         $text =~ s/\[.[^\]]+\]//sgo;
1667                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1668                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1669                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1670                                                 },
1671                         },
1672                         author  => {
1673                                         xpath   => [
1674                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1675                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1676                                         ],
1677                                         fixup   => sub {
1678                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1679                                                         $text = NFD($text);
1680                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1681                                                         $text =~ s/\pM+//gso;
1682                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1683                                                         $text = lc($text);
1684                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1685                                                         $text =~ s/\s+/ /sgo;
1686                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1687                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1688                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1689                                                         $text =~ s/,?\s+.*$//sgo;
1690                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1691                                                 },
1692                         },
1693         ],
1694
1695         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1696                         title   => {
1697                                         xpath   => [
1698                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1699                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1700                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1701                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1702                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1703                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1704                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1705                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1706                                         ],
1707                                         fixup   => sub {
1708                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1709                                                         $text = NFD($text);
1710                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1711                                                         $text =~ s/\pM+//gso;
1712                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1713                                                         $text = lc($text);
1714                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1715                                                         $text =~ s/\s+/ /sgo;
1716                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1717                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1718                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1719                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1720                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1721                                                         $text =~ s/\[.[^\]]+\]//sgo;
1722                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1723                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1724                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1725                                                 },
1726                         },
1727                         author  => {
1728                                         xpath   => [
1729                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1730                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1731                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1732                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1733                                         ],
1734                                         fixup   => sub {
1735                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1736                                                         $text = NFD($text);
1737                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1738                                                         $text =~ s/\pM+//gso;
1739                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1740                                                         $text = lc($text);
1741                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1742                                                         $text =~ s/\s+/ /sgo;
1743                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1744                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1745                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1746                                                         $text =~ s/,?\s+.*$//sgo;
1747                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1748                                                 },
1749                         },
1750         ],
1751
1752 );
1753
1754 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1755
1756 sub _fp_mods {
1757         my $mods = shift;
1758         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1759
1760         my $fp_string = '';
1761
1762         my $match_index = 0;
1763         my $block_index = 1;
1764         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1765                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1766
1767                         my $block_name_index = 0;
1768                         my $block_value_index = 1;
1769                         my $block = $fp_mods_xpath[$block_index];
1770                         while ( my $part = $$block[$block_value_index] ) {
1771                                 local $text;
1772                                 for my $xpath ( @{ $part->{xpath} } ) {
1773                                         $text = $mods->findvalue( $xpath );
1774                                         last if ($text);
1775                                 }
1776
1777                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1778
1779                                 if ($text) {
1780                                         $$part{fixup}->();
1781                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1782                                         $fp_string .= $text;
1783                                 }
1784
1785                                 $block_name_index += 2;
1786                                 $block_value_index += 2;
1787                         }
1788                 }
1789                 if ($fp_string) {
1790                         $fp_string =~ s/\W+//gso;
1791                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1792                         return $fp_string;
1793                 }
1794
1795                 $match_index += 2;
1796                 $block_index += 2;
1797         }
1798         return undef;
1799 }
1800
1801 sub refingerprint_bibrec {
1802         my $self = shift;
1803         my $client = shift;
1804         my $rec = shift;
1805
1806         my $commit = 0;
1807         if (!OpenILS::Application::Ingest->in_transaction) {
1808                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1809                 $commit = 1;
1810         }
1811
1812         my $success = 1;
1813         try {
1814                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1815                 for my $b (@$bibs) {
1816                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1817
1818                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1819
1820                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1821
1822                                 OpenILS::Application::Ingest->storage_req(
1823                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1824                                         { id => $b->id },
1825                                         { fingerprint => $fp->{fingerprint},
1826                                           quality     => $fp->{quality} }
1827                                 );
1828
1829                                 if ($self->api_name !~ /nomap/o) {
1830                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
1831                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1832                                                 $b->id
1833                                         );
1834
1835                                         my $old_mrid;
1836                                         if (ref($old_source_map) and @$old_source_map) {
1837                                                 for my $m (@$old_source_map) {
1838                                                         $old_mrid = $m->metarecord;
1839                                                         OpenILS::Application::Ingest->storage_req(
1840                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1841                                                                 $m->id
1842                                                         );
1843                                                 }
1844                                         }
1845
1846                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
1847                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1848                                                         { metarecord => $old_mrid }
1849                                         ) if ($old_mrid);
1850
1851                                         if (ref($old_sm) and @$old_sm == 0) {
1852                                                 OpenILS::Application::Ingest->storage_req(
1853                                                         'open-ils.storage.direct.metabib.metarecord.delete',
1854                                                         $old_mrid
1855                                                 );
1856                                         }
1857
1858                                         my $mr = OpenILS::Application::Ingest->storage_req(
1859                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1860                                                         { fingerprint => $fp->{fingerprint} }
1861                                         )->[0];
1862                                 
1863                                         unless ($mr) {
1864                                                 $mr = Fieldmapper::metabib::metarecord->new;
1865                                                 $mr->fingerprint( $fp->{fingerprint} );
1866                                                 $mr->master_record( $b->id );
1867                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1868                                         }
1869
1870                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1871                                         $mr_map->metarecord( $mr->id );
1872                                         $mr_map->source( $b->id );
1873                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1874
1875                                 }
1876                         }
1877                         $client->respond($b->id);
1878                 }
1879
1880         } otherwise {
1881                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1882                 $success = 0;
1883         };
1884
1885         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1886         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1887         return undef;
1888 }
1889 __PACKAGE__->register_method(  
1890         api_name        => "open-ils.worm.fingerprint.record.update",
1891         method          => "refingerprint_bibrec",
1892         api_level       => 1,
1893         argc            => 1,
1894         stream          => 1,
1895 );                      
1896
1897 __PACKAGE__->register_method(  
1898         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
1899         method          => "refingerprint_bibrec",
1900         api_level       => 1,
1901         argc            => 1,
1902 );                      
1903
1904 =comment
1905
1906 sub fingerprint_bibrec {
1907         my $self = shift;
1908         my $client = shift;
1909         my $rec = shift;
1910
1911         OpenILS::Application::Ingest->post_init();
1912         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1913
1914         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1915         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1916         return $fp;
1917
1918 }
1919 __PACKAGE__->register_method(  
1920         api_name        => "open-ils.worm.fingerprint.record",
1921         method          => "fingerprint_bibrec",
1922         api_level       => 0,
1923         argc            => 1,
1924 );                      
1925
1926
1927 sub fingerprint_mods {
1928         my $self = shift;
1929         my $client = shift;
1930         my $xml = shift;
1931
1932         OpenILS::Application::Ingest->post_init();
1933         my $mods = $parser->parse_string($xml)->documentElement;
1934
1935         return _fp_mods( $mods );
1936 }
1937 __PACKAGE__->register_method(  
1938         api_name        => "open-ils.worm.fingerprint.mods",
1939         method          => "fingerprint_mods",
1940         api_level       => 1,
1941         argc            => 1,
1942 );                      
1943
1944 sub fingerprint_marc {
1945         my $self = shift;
1946         my $client = shift;
1947         my $xml = shift;
1948
1949         $xml = $parser->parse_string($xml) unless (ref $xml);
1950
1951         OpenILS::Application::Ingest->post_init();
1952         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1953         $log->debug("Returning [$fp] as fingerprint", INFO);
1954         return $fp;
1955 }
1956 __PACKAGE__->register_method(  
1957         api_name        => "open-ils.worm.fingerprint.marc",
1958         method          => "fingerprint_marc",
1959         api_level       => 1,
1960         argc            => 1,
1961 );                      
1962
1963
1964 =cut
1965
1966 sub biblio_fingerprint_record {
1967         my $self = shift;
1968         my $client = shift;
1969         my $rec = shift;
1970
1971         OpenILS::Application::Ingest->post_init();
1972
1973         my $marc = OpenILS::Application::Ingest
1974                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1975                         ->marc;
1976
1977         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1978         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1979         return $fp;
1980 }
1981 __PACKAGE__->register_method(  
1982         api_name        => "open-ils.worm.fingerprint.record",
1983         method          => "biblio_fingerprint_record",
1984         api_level       => 1,
1985         argc            => 1,
1986 );                      
1987
1988 our $fp_script;
1989 sub biblio_fingerprint {
1990         my $self = shift;
1991         my $client = shift;
1992         my $marc = shift;
1993
1994         OpenILS::Application::Ingest->post_init();
1995
1996         $marc = $parser->parse_string($marc) unless (ref $marc);
1997
1998         my $mods = OpenILS::Application::Ingest::entityize(
1999                 $mods_sheet
2000                         ->transform( $marc )
2001                         ->documentElement
2002                         ->toString,
2003                 'D'
2004         );
2005
2006         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2007
2008         warn $marc;
2009         $log->internal("Got MARC [$marc]");
2010         $log->internal("Created MODS [$mods]");
2011
2012         if(!$fp_script) {
2013                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2014                 my $conf = OpenSRF::Utils::SettingsClient->new;
2015
2016                 my $libs        = $conf->config_value(@pfx, 'script_path');
2017                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2018                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2019
2020                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2021                 
2022                 $fp_script = new OpenILS::Utils::ScriptRunner
2023                         ( file          => $script_file,
2024                           paths         => $script_libs,
2025                           reset_count   => 1000 );
2026         }
2027
2028         $log->debug("Applying environment for biblio fingerprinting...");
2029
2030         my $env = {marc => $marc, mods => $mods};
2031         #my $res = {fingerprint => '', quality => '0'};
2032
2033         $fp_script->insert('environment' => $env);
2034         #$fp_script->insert('result' => $res);
2035
2036         $log->debug("Running script for biblio fingerprinting...");
2037
2038         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2039
2040         $log->debug("Script for biblio fingerprinting completed successfully...");
2041
2042         return $res;
2043 }
2044 __PACKAGE__->register_method(  
2045         api_name        => "open-ils.worm.fingerprint.marc",
2046         method          => "biblio_fingerprint",
2047         api_level       => 1,
2048         argc            => 1,
2049 );                      
2050
2051 # --------------------------------------------------------------------------------
2052
2053 1;
2054
2055 __END__
2056 my $in_xact;
2057 my $begin;
2058 my $commit;
2059 my $rollback;
2060 my $lookup;
2061 my $update_entry;
2062 my $mr_lookup;
2063 my $mr_update;
2064 my $mr_create;
2065 my $create_source_map;
2066 my $sm_lookup;
2067 my $rm_old_rd;
2068 my $rm_old_sm;
2069 my $rm_old_fr;
2070 my $rm_old_tr;
2071 my $rm_old_ar;
2072 my $rm_old_sr;
2073 my $rm_old_kr;
2074 my $rm_old_ser;
2075
2076 my $fr_create;
2077 my $rd_create;
2078 my $create = {};
2079
2080 my %descriptor_code = (
2081         item_type => 'substr($ldr,6,1)',
2082         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2083         bib_level => 'substr($ldr,7,1)',
2084         control_type => 'substr($ldr,8,1)',
2085         char_encoding => 'substr($ldr,9,1)',
2086         enc_level => 'substr($ldr,17,1)',
2087         cat_form => 'substr($ldr,18,1)',
2088         pub_status => 'substr($ldr,5,1)',
2089         item_lang => 'substr($oo8,35,3)',
2090         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2091         audience => 'substr($oo8,22,1)',
2092 );
2093
2094 sub wormize {
2095
2096         my $self = shift;
2097         my $client = shift;
2098         my @docids = @_;
2099
2100         my $no_map = 0;
2101         if ($self->api_name =~ /no_map/o) {
2102                 $no_map = 1;
2103         }
2104
2105         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2106                 unless ($in_xact);
2107         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2108                 unless ($begin);
2109         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2110                 unless ($commit);
2111         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2112                 unless ($rollback);
2113         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2114                 unless ($sm_lookup);
2115         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2116                 unless ($mr_lookup);
2117         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2118                 unless ($mr_update);
2119         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2120                 unless ($lookup);
2121         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2122                 unless ($update_entry);
2123         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2124                 unless ($rm_old_sm);
2125         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2126                 unless ($rm_old_rd);
2127         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2128                 unless ($rm_old_fr);
2129         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2130                 unless ($rm_old_tr);
2131         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2132                 unless ($rm_old_ar);
2133         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2134                 unless ($rm_old_sr);
2135         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2136                 unless ($rm_old_kr);
2137         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2138                 unless ($rm_old_ser);
2139         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2140                 unless ($mr_create);
2141         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2142                 unless ($create_source_map);
2143         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2144                 unless ($rd_create);
2145         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2146                 unless ($fr_create);
2147         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2148                 unless ($$create{title});
2149         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2150                 unless ($$create{author});
2151         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2152                 unless ($$create{subject});
2153         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2154                 unless ($$create{keyword});
2155         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2156                 unless ($$create{series});
2157
2158
2159         my ($outer_xact) = $in_xact->run;
2160         try {
2161                 unless ($outer_xact) {
2162                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2163                         my ($r) = $begin->run($client);
2164                         unless (defined $r and $r) {
2165                                 $rollback->run;
2166                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2167                         }
2168                 }
2169         } catch Error with {
2170                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2171         };
2172
2173         my @source_maps;
2174         my @entry_list;
2175         my @mr_list;
2176         my @rd_list;
2177         my @ns_list;
2178         my @mods_data;
2179         my $ret = 0;
2180         for my $entry ( $lookup->run(@docids) ) {
2181                 # step -1: grab the doc from storage
2182                 next unless ($entry);
2183
2184                 if(!$mods_sheet) {
2185                         my $xslt_doc = $parser->parse_file(
2186                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2187                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2188                 }
2189
2190                 my $xml = $entry->marc;
2191                 my $docid = $entry->id;
2192                 my $marcdoc = $parser->parse_string($xml);
2193                 my $modsdoc = $mods_sheet->transform($marcdoc);
2194
2195                 my $mods = $modsdoc->documentElement;
2196                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2197
2198                 $entry->fingerprint( fingerprint_mods( $mods ) );
2199                 push @entry_list, $entry;
2200
2201                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2202
2203                 unless ($no_map) {
2204                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2205                         if (!$mr || !@$mr) {
2206                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2207                                 $mr = new Fieldmapper::metabib::metarecord;
2208                                 $mr->fingerprint( $entry->fingerprint );
2209                                 $mr->master_record( $entry->id );
2210                                 my ($new_mr) = $mr_create->run($mr);
2211                                 $mr->id($new_mr);
2212                                 unless (defined $mr) {
2213                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2214                                 }
2215                         } else {
2216                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2217                                 $mr->mods('');
2218                                 push @mr_list, $mr;
2219                         }
2220
2221                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2222                         $sm->metarecord( $mr->id );
2223                         $sm->source( $entry->id );
2224                         push @source_maps, $sm;
2225                 }
2226
2227                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2228                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2229
2230                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2231                 for my $rd_field ( keys %descriptor_code ) {
2232                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2233                 }
2234                 $rd_obj->record( $docid );
2235                 push @rd_list, $rd_obj;
2236
2237                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2238
2239                 # step 2: build the KOHA rows
2240                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2241                 $_->record( $docid ) for (@tmp_list);
2242                 push @ns_list, @tmp_list;
2243
2244                 $ret++;
2245
2246                 last unless ($self->api_name =~ /batch$/o);
2247         }
2248
2249         $rm_old_rd->run( { record => \@docids } );
2250         $rm_old_fr->run( { record => \@docids } );
2251         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2252         $rm_old_tr->run( { source => \@docids } );
2253         $rm_old_ar->run( { source => \@docids } );
2254         $rm_old_sr->run( { source => \@docids } );
2255         $rm_old_kr->run( { source => \@docids } );
2256         $rm_old_ser->run( { source => \@docids } );
2257
2258         unless ($no_map) {
2259                 my ($sm) = $create_source_map->run(@source_maps);
2260                 unless (defined $sm) {
2261                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2262                 }
2263                 my ($mr) = $mr_update->run(@mr_list);
2264                 unless (defined $mr) {
2265                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2266                 }
2267         }
2268
2269         my ($re) = $update_entry->run(@entry_list);
2270         unless (defined $re) {
2271                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2272         }
2273
2274         my ($rd) = $rd_create->run(@rd_list);
2275         unless (defined $rd) {
2276                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2277         }
2278
2279         my ($fr) = $fr_create->run(@ns_list);
2280         unless (defined $fr) {
2281                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2282         }
2283
2284         # step 5: insert the new metadata
2285         for my $class ( qw/title author subject keyword series/ ) {
2286                 my @md_list = ();
2287                 for my $doc ( @mods_data ) {
2288                         my ($did) = keys %$doc;
2289                         my ($data) = values %$doc;
2290
2291                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2292                         for my $row ( keys %{ $$data{$class} } ) {
2293                                 next unless (exists $$data{$class}{$row});
2294                                 next unless ($$data{$class}{$row}{value});
2295                                 my $fm_obj = $fm_constructor->new;
2296                                 $fm_obj->value( $$data{$class}{$row}{value} );
2297                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2298                                 $fm_obj->source( $did );
2299                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2300
2301                                 push @md_list, $fm_obj;
2302                         }
2303                 }
2304                         
2305                 my ($cr) = $$create{$class}->run(@md_list);
2306                 unless (defined $cr) {
2307                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2308                 }
2309         }
2310
2311         unless ($outer_xact) {
2312                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2313                 my ($c) = $commit->run;
2314                 unless (defined $c and $c) {
2315                         $rollback->run;
2316                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2317                 }
2318         }
2319
2320         return $ret;
2321 }
2322 __PACKAGE__->register_method( 
2323         api_name        => "open-ils.worm.wormize",
2324         method          => "wormize",
2325         api_level       => 1,
2326         argc            => 1,
2327 );
2328 __PACKAGE__->register_method( 
2329         api_name        => "open-ils.worm.wormize.no_map",
2330         method          => "wormize",
2331         api_level       => 1,
2332         argc            => 1,
2333 );
2334 __PACKAGE__->register_method( 
2335         api_name        => "open-ils.worm.wormize.batch",
2336         method          => "wormize",
2337         api_level       => 1,
2338         argc            => 1,
2339 );
2340 __PACKAGE__->register_method( 
2341         api_name        => "open-ils.worm.wormize.no_map.batch",
2342         method          => "wormize",
2343         api_level       => 1,
2344         argc            => 1,
2345 );
2346
2347
2348 my $ain_xact;
2349 my $abegin;
2350 my $acommit;
2351 my $arollback;
2352 my $alookup;
2353 my $aupdate_entry;
2354 my $amr_lookup;
2355 my $amr_update;
2356 my $amr_create;
2357 my $acreate_source_map;
2358 my $asm_lookup;
2359 my $arm_old_rd;
2360 my $arm_old_sm;
2361 my $arm_old_fr;
2362 my $arm_old_tr;
2363 my $arm_old_ar;
2364 my $arm_old_sr;
2365 my $arm_old_kr;
2366 my $arm_old_ser;
2367
2368 my $afr_create;
2369 my $ard_create;
2370 my $acreate = {};
2371
2372 sub authority_wormize {
2373
2374         my $self = shift;
2375         my $client = shift;
2376         my @docids = @_;
2377
2378         my $no_map = 0;
2379         if ($self->api_name =~ /no_map/o) {
2380                 $no_map = 1;
2381         }
2382
2383         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2384                 unless ($in_xact);
2385         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2386                 unless ($begin);
2387         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2388                 unless ($commit);
2389         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2390                 unless ($rollback);
2391         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2392                 unless ($alookup);
2393         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2394                 unless ($aupdate_entry);
2395         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2396                 unless ($arm_old_rd);
2397         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2398                 unless ($arm_old_fr);
2399         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2400                 unless ($ard_create);
2401         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2402                 unless ($afr_create);
2403
2404
2405         my ($outer_xact) = $in_xact->run;
2406         try {
2407                 unless ($outer_xact) {
2408                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2409                         my ($r) = $begin->run($client);
2410                         unless (defined $r and $r) {
2411                                 $rollback->run;
2412                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2413                         }
2414                 }
2415         } catch Error with {
2416                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2417         };
2418
2419         my @source_maps;
2420         my @entry_list;
2421         my @mr_list;
2422         my @rd_list;
2423         my @ns_list;
2424         my @mads_data;
2425         my $ret = 0;
2426         for my $entry ( $lookup->run(@docids) ) {
2427                 # step -1: grab the doc from storage
2428                 next unless ($entry);
2429
2430                 #if(!$mads_sheet) {
2431                 #       my $xslt_doc = $parser->parse_file(
2432                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2433                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2434                 #}
2435
2436                 my $xml = $entry->marc;
2437                 my $docid = $entry->id;
2438                 my $marcdoc = $parser->parse_string($xml);
2439                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2440
2441                 #my $mads = $madsdoc->documentElement;
2442                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2443
2444                 push @entry_list, $entry;
2445
2446                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2447                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2448
2449                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2450                 for my $rd_field ( keys %descriptor_code ) {
2451                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2452                 }
2453                 $rd_obj->record( $docid );
2454                 push @rd_list, $rd_obj;
2455
2456                 # step 2: build the KOHA rows
2457                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2458                 $_->record( $docid ) for (@tmp_list);
2459                 push @ns_list, @tmp_list;
2460
2461                 $ret++;
2462
2463                 last unless ($self->api_name =~ /batch$/o);
2464         }
2465
2466         $arm_old_rd->run( { record => \@docids } );
2467         $arm_old_fr->run( { record => \@docids } );
2468
2469         my ($rd) = $ard_create->run(@rd_list);
2470         unless (defined $rd) {
2471                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2472         }
2473
2474         my ($fr) = $fr_create->run(@ns_list);
2475         unless (defined $fr) {
2476                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2477         }
2478
2479         unless ($outer_xact) {
2480                 $log->debug("Commiting transaction started by Ingest.", INFO);
2481                 my ($c) = $commit->run;
2482                 unless (defined $c and $c) {
2483                         $rollback->run;
2484                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2485                 }
2486         }
2487
2488         return $ret;
2489 }
2490 __PACKAGE__->register_method( 
2491         api_name        => "open-ils.worm.authortiy.wormize",
2492         method          => "wormize",
2493         api_level       => 1,
2494         argc            => 1,
2495 );
2496 __PACKAGE__->register_method( 
2497         api_name        => "open-ils.worm.authority.wormize.batch",
2498         method          => "wormize",
2499         api_level       => 1,
2500         argc            => 1,
2501 );
2502
2503
2504 # --------------------------------------------------------------------------------
2505
2506
2507 sub _marcxml_to_full_rows {
2508
2509         my $marcxml = shift;
2510         my $type = shift || 'Fieldmapper::metabib::full_rec';
2511
2512         my @ns_list;
2513         
2514         my $root = $marcxml->documentElement;
2515
2516         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2517                 next unless $tagline;
2518
2519                 my $ns = new Fieldmapper::metabib::full_rec;
2520
2521                 $ns->tag( 'LDR' );
2522                 my $val = NFD($tagline->textContent);
2523                 $val =~ s/(\pM+)//gso;
2524                 $ns->value( $val );
2525
2526                 push @ns_list, $ns;
2527         }
2528
2529         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2530                 next unless $tagline;
2531
2532                 my $ns = new Fieldmapper::metabib::full_rec;
2533
2534                 $ns->tag( $tagline->getAttribute( "tag" ) );
2535                 my $val = NFD($tagline->textContent);
2536                 $val =~ s/(\pM+)//gso;
2537                 $ns->value( $val );
2538
2539                 push @ns_list, $ns;
2540         }
2541
2542         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2543                 next unless $tagline;
2544
2545                 my $tag = $tagline->getAttribute( "tag" );
2546                 my $ind1 = $tagline->getAttribute( "ind1" );
2547                 my $ind2 = $tagline->getAttribute( "ind2" );
2548
2549                 for my $data ( $tagline->childNodes ) {
2550                         next unless $data;
2551
2552                         my $ns = $type->new;
2553
2554                         $ns->tag( $tag );
2555                         $ns->ind1( $ind1 );
2556                         $ns->ind2( $ind2 );
2557                         $ns->subfield( $data->getAttribute( "code" ) );
2558                         my $val = NFD($data->textContent);
2559                         $val =~ s/(\pM+)//gso;
2560                         $ns->value( lc($val) );
2561
2562                         push @ns_list, $ns;
2563                 }
2564         }
2565         return @ns_list;
2566 }
2567
2568 sub _get_field_value {
2569
2570         my( $root, $xpath ) = @_;
2571
2572         my $string = "";
2573
2574         # grab the set of matching nodes
2575         my @nodes = $root->findnodes( $xpath );
2576         for my $value (@nodes) {
2577
2578                 # grab all children of the node
2579                 my @children = $value->childNodes();
2580                 for my $child (@children) {
2581
2582                         # add the childs content to the growing buffer
2583                         my $content = quotemeta($child->textContent);
2584                         next if ($string =~ /$content/);  # uniquify the values
2585                         $string .= $child->textContent . " ";
2586                 }
2587                 if( ! @children ) {
2588                         $string .= $value->textContent . " ";
2589                 }
2590         }
2591         $string = NFD($string);
2592         $string =~ s/(\pM)//gso;
2593         return lc($string);
2594 }
2595
2596
2597 sub modsdoc_to_values {
2598         my( $self, $mods ) = @_;
2599         my $data = {};
2600         for my $class (keys %$xpathset) {
2601                 $data->{$class} = {};
2602                 for my $type (keys %{$xpathset->{$class}}) {
2603                         $data->{$class}->{$type} = {};
2604                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2605                 }
2606         }
2607         return $data;
2608 }
2609
2610
2611 1;
2612
2613