]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
7dabc28294ef464de4fbabeb0eb462ad3cbe04a5
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => ''},
25         oai_dc  => {ns => ''},
26         rdf_dc  => {ns => ''},
27 );
28
29
30 our $log = 'OpenSRF::Utils::Logger';
31
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
34
35 our $mods_sheet;
36 our $mads_sheet;
37 our $xpathset = {};
38 sub initialize {}
39 sub child_init {}
40
41 sub post_init {
42
43         unless (keys %$xpathset) {
44                 $log->debug("Running post_init", DEBUG);
45
46                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
47
48                 unless ($supported_formats{mods}{xslt}) {
49                         $log->debug("Loading MODS XSLT", DEBUG);
50                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
52                 }
53
54                 unless ($supported_formats{mods3}{xslt}) {
55                         $log->debug("Loading MODS v3 XSLT", DEBUG);
56                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
58                 }
59
60
61                 my $req = OpenSRF::AppSession
62                                 ->create('open-ils.cstore')
63                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
64                                 ->gather(1);
65
66                 if (ref $req and @$req) {
67                         for my $f (@$req) {
68                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
72                         }
73                 }
74         }
75 }
76
77 sub entityize {
78         my $stuff = shift;
79         my $form = shift;
80
81         if ($form eq 'D') {
82                 $stuff = NFD($stuff);
83         } else {
84                 $stuff = NFC($stuff);
85         }
86
87         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
88         return $stuff;
89 }
90
91 sub ro_biblio_ingest_single_xml {
92         my $self = shift;
93         my $client = shift;
94         my $xml = shift;
95
96         my $document = $parser->parse_string($xml);
97
98         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
99         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
100         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
101
102         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp };
103 }
104 __PACKAGE__->register_method(  
105         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
106         method          => "ro_biblio_ingest_single_xml",
107         api_level       => 1,
108         argc            => 1,
109 );                      
110
111 sub ro_biblio_ingest_single_record {
112         my $self = shift;
113         my $client = shift;
114         my $rec = shift;
115
116         OpenILS::Application::Ingest->post_init();
117         my $r = OpenSRF::AppSession
118                         ->create('open-ils.cstore')
119                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
120                         ->gather(1);
121
122         return undef unless ($r and @$r);
123
124         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
125
126         $_->source($rec) for (@{$res->{field_entries}});
127         $_->record($rec) for (@{$res->{full_rec}});
128
129         return $res;
130 }
131 __PACKAGE__->register_method(  
132         api_name        => "open-ils.ingest.full.biblio.record.readonly",
133         method          => "ro_biblio_ingest_single_record",
134         api_level       => 1,
135         argc            => 1,
136 );                      
137
138 sub ro_biblio_ingest_stream_record {
139         my $self = shift;
140         my $client = shift;
141
142         OpenILS::Application::Ingest->post_init();
143
144         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
145
146         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
147         
148                 my $rec = $resp->content;
149                 last unless (defined $rec);
150
151                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
152                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
153
154                 $_->source($rec) for (@{$res->{field_entries}});
155                 $_->record($rec) for (@{$res->{full_rec}});
156
157                 $client->respond( $res );
158         }
159
160         return undef;
161 }
162 __PACKAGE__->register_method(  
163         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
164         method          => "ro_biblio_ingest_stream_record",
165         api_level       => 1,
166         stream          => 1,
167 );                      
168
169 sub ro_biblio_ingest_stream_xml {
170         my $self = shift;
171         my $client = shift;
172
173         OpenILS::Application::Ingest->post_init();
174
175         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
176
177         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
178         
179                 my $xml = $resp->content;
180                 last unless (defined $xml);
181
182                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
183                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
184
185                 $client->respond( $res );
186         }
187
188         return undef;
189 }
190 __PACKAGE__->register_method(  
191         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
192         method          => "ro_biblio_ingest_stream_xml",
193         api_level       => 1,
194         stream          => 1,
195 );                      
196
197 sub rw_biblio_ingest_stream_import {
198         my $self = shift;
199         my $client = shift;
200
201         OpenILS::Application::Ingest->post_init();
202
203         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
204
205         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
206         
207                 my $bib = $resp->content;
208                 last unless (defined $bib);
209
210                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
211                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
212
213                 $_->source($bib->id) for (@{$res->{field_entries}});
214                 $_->record($bib->id) for (@{$res->{full_rec}});
215
216                 $client->respond( @{$res->{field_entries}} + @{$res->{full_rec}} );
217         }
218
219         return undef;
220 }
221 __PACKAGE__->register_method(  
222         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
223         method          => "rw_biblio_ingest_stream_import",
224         api_level       => 1,
225         stream          => 1,
226 );                      
227
228
229 # --------------------------------------------------------------------------------
230 # MARC index extraction
231
232 package OpenILS::Application::Ingest::XPATH;
233 use base qw/OpenILS::Application::Ingest/;
234 use Unicode::Normalize;
235
236 # give this an XML documentElement and an XPATH expression
237 sub xpath_to_string {
238         my $xml = shift;
239         my $xpath = shift;
240         my $ns_uri = shift;
241         my $ns_prefix = shift;
242         my $unique = shift;
243
244         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
245
246         my $string = "";
247
248         # grab the set of matching nodes
249         my @nodes = $xml->findnodes( $xpath );
250         for my $value (@nodes) {
251
252                 # grab all children of the node
253                 my @children = $value->childNodes();
254                 for my $child (@children) {
255
256                         # add the childs content to the growing buffer
257                         my $content = quotemeta($child->textContent);
258                         next if ($unique && $string =~ /$content/);  # uniquify the values
259                         $string .= $child->textContent . " ";
260                 }
261                 if( ! @children ) {
262                         $string .= $value->textContent . " ";
263                 }
264         }
265         return NFD($string);
266 }
267
268 sub class_index_string_xml {
269         my $self = shift;
270         my $client = shift;
271         my $xml = shift;
272         my @classes = @_;
273
274         OpenILS::Application::Ingest->post_init();
275         $xml = $parser->parse_string($xml) unless (ref $xml);
276
277         my %transform_cache;
278         
279         for my $class (@classes) {
280                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
281                 for my $type ( keys %{ $xpathset->{$class} } ) {
282
283                         my $def = $xpathset->{$class}->{$type};
284                         my $sf = $supported_formats{$def->{format}};
285
286                         my $document = $xml;
287
288                         if ($sf->{xslt}) {
289                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
290                                 $transform_cache{$def->{format}} = $document;
291                         }
292
293                         my $value =  xpath_to_string(
294                                         $document->documentElement      => $def->{xpath},
295                                         $sf->{ns}                       => $def->{format},
296                                         1
297                         );
298
299                         next unless $value;
300
301                         $value =~ s/\pM+//sgo;
302                         $value =~ s/\pC+//sgo;
303                         #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
304
305                         $value =~ s/(\w)\./$1/sgo;
306                         $value = lc($value);
307
308                         my $fm = $class_constructor->new;
309                         $fm->value( $value );
310                         $fm->field( $xpathset->{$class}->{$type}->{id} );
311                         $client->respond($fm);
312                 }
313         }
314         return undef;
315 }
316 __PACKAGE__->register_method(  
317         api_name        => "open-ils.ingest.field_entry.class.xml",
318         method          => "class_index_string_xml",
319         api_level       => 1,
320         argc            => 2,
321         stream          => 1,
322 );                      
323
324 sub class_index_string_record {
325         my $self = shift;
326         my $client = shift;
327         my $rec = shift;
328         my @classes = shift;
329
330         OpenILS::Application::Ingest->post_init();
331         my $r = OpenSRF::AppSession
332                         ->create('open-ils.cstore')
333                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
334                         ->gather(1);
335
336         return undef unless ($r and @$r);
337
338         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
339                 $fm->source($rec);
340                 $client->respond($fm);
341         }
342         return undef;
343 }
344 __PACKAGE__->register_method(  
345         api_name        => "open-ils.ingest.field_entry.class.record",
346         method          => "class_index_string_record",
347         api_level       => 1,
348         argc            => 2,
349         stream          => 1,
350 );                      
351
352 sub all_index_string_xml {
353         my $self = shift;
354         my $client = shift;
355         my $xml = shift;
356
357         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
358                 $client->respond($fm);
359         }
360         return undef;
361 }
362 __PACKAGE__->register_method(  
363         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
364         method          => "all_index_string_xml",
365         api_level       => 1,
366         argc            => 1,
367         stream          => 1,
368 );                      
369
370 sub all_index_string_record {
371         my $self = shift;
372         my $client = shift;
373         my $rec = shift;
374
375         OpenILS::Application::Ingest->post_init();
376         my $r = OpenSRF::AppSession
377                         ->create('open-ils.cstore')
378                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
379                         ->gather(1);
380
381         return undef unless ($r and @$r);
382
383         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
384                 $fm->source($rec);
385                 $client->respond($fm);
386         }
387         return undef;
388 }
389 __PACKAGE__->register_method(  
390         api_name        => "open-ils.ingest.extract.field_entry.all.record",
391         method          => "all_index_string_record",
392         api_level       => 1,
393         argc            => 1,
394         stream          => 1,
395 );                      
396
397 # --------------------------------------------------------------------------------
398 # Flat MARC
399
400 package OpenILS::Application::Ingest::FlatMARC;
401 use base qw/OpenILS::Application::Ingest/;
402 use Unicode::Normalize;
403
404
405 sub _marcxml_to_full_rows {
406
407         my $marcxml = shift;
408         my $xmltype = shift || 'metabib';
409
410         my $type = "Fieldmapper::${xmltype}::full_rec";
411
412         my @ns_list;
413         
414         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
415
416         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
417                 next unless $tagline;
418
419                 my $ns = $type->new;
420
421                 $ns->tag( 'LDR' );
422                 my $val = $tagline->textContent;
423                 $val = NFD($val);
424                 $val =~ s/(\pM+)//gso;
425                 $ns->value( $val );
426
427                 push @ns_list, $ns;
428         }
429
430         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
431                 next unless $tagline;
432
433                 my $ns = $type->new;
434
435                 $ns->tag( $tagline->getAttribute( "tag" ) );
436                 my $val = $tagline->textContent;
437                 $val = NFD($val);
438                 $val =~ s/(\pM+)//gso;
439                 $ns->value( $val );
440
441                 push @ns_list, $ns;
442         }
443
444         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
445                 next unless $tagline;
446
447                 my $tag = $tagline->getAttribute( "tag" );
448                 my $ind1 = $tagline->getAttribute( "ind1" );
449                 my $ind2 = $tagline->getAttribute( "ind2" );
450
451                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
452                         next unless $data;
453
454                         my $ns = $type->new;
455
456                         $ns->tag( $tag );
457                         $ns->ind1( $ind1 );
458                         $ns->ind2( $ind2 );
459                         $ns->subfield( $data->getAttribute( "code" ) );
460                         my $val = $data->textContent;
461                         $val = NFD($val);
462                         $val =~ s/(\pM+)//gso;
463                         $ns->value( lc($val) );
464
465                         push @ns_list, $ns;
466                 }
467         }
468
469         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
470         return @ns_list;
471 }
472
473 sub flat_marc_xml {
474         my $self = shift;
475         my $client = shift;
476         my $xml = shift;
477
478         $log->debug("processing [$xml]");
479
480         $xml = $parser->parse_string($xml) unless (ref $xml);
481
482         my $type = 'metabib';
483         $type = 'authority' if ($self->api_name =~ /authority/o);
484
485         OpenILS::Application::Ingest->post_init();
486
487         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
488         return undef;
489 }
490 __PACKAGE__->register_method(  
491         api_name        => "open-ils.ingest.flat_marc.authority.xml",
492         method          => "flat_marc_xml",
493         api_level       => 1,
494         argc            => 1,
495         stream          => 1,
496 );                      
497 __PACKAGE__->register_method(  
498         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
499         method          => "flat_marc_xml",
500         api_level       => 1,
501         argc            => 1,
502         stream          => 1,
503 );                      
504
505 sub flat_marc_record {
506         my $self = shift;
507         my $client = shift;
508         my $rec = shift;
509
510         my $type = 'biblio';
511         $type = 'authority' if ($self->api_name =~ /authority/o);
512
513         OpenILS::Application::Ingest->post_init();
514         my $r = OpenSRF::AppSession
515                         ->create('open-ils.cstore')
516                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
517                         ->gather(1);
518
519
520         return undef unless ($r and $r->marc);
521
522         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
523         for my $row (@rows) {
524                 $client->respond($row);
525                 $log->debug(JSON->perl2JSON($row), DEBUG);
526         }
527         return undef;
528 }
529 __PACKAGE__->register_method(  
530         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
531         method          => "flat_marc_record",
532         api_level       => 1,
533         argc            => 1,
534         stream          => 1,
535 );                      
536 __PACKAGE__->register_method(  
537         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
538         method          => "flat_marc_record",
539         api_level       => 1,
540         argc            => 1,
541         stream          => 1,
542 );                      
543
544 # --------------------------------------------------------------------------------
545 # Fingerprinting
546
547 package OpenILS::Application::Ingest::Biblio::Fingerprint;
548 use base qw/OpenILS::Application::Ingest/;
549 use Unicode::Normalize;
550 use OpenSRF::EX qw/:try/;
551
552 sub biblio_fingerprint_record {
553         my $self = shift;
554         my $client = shift;
555         my $rec = shift;
556
557         OpenILS::Application::Ingest->post_init();
558
559         my $r = OpenSRF::AppSession
560                         ->create('open-ils.cstore')
561                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
562                         ->gather(1);
563
564         return undef unless ($r and $r->marc);
565
566         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
567         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
568         return $fp;
569 }
570 __PACKAGE__->register_method(  
571         api_name        => "open-ils.ingest.fingerprint.record",
572         method          => "biblio_fingerprint_record",
573         api_level       => 1,
574         argc            => 1,
575 );                      
576
577 our $fp_script;
578 sub biblio_fingerprint {
579         my $self = shift;
580         my $client = shift;
581         my $xml = shift;
582
583         $log->internal("Got MARC [$xml]");
584
585         if(!$fp_script) {
586                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
587                 my $conf = OpenSRF::Utils::SettingsClient->new;
588
589                 my $libs        = $conf->config_value(@pfx, 'script_path');
590                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
591                 my $script_libs = (ref($libs)) ? $libs : [$libs];
592
593                 $log->debug("Loading script $script_file for biblio fingerprinting...");
594                 
595                 $fp_script = new OpenILS::Utils::ScriptRunner
596                         ( file          => $script_file,
597                           paths         => $script_libs,
598                           reset_count   => 1000 );
599         }
600
601         $fp_script->insert('environment' => {marc => $xml} => 1);
602
603         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
604         $log->debug("Script for biblio fingerprinting completed successfully...");
605
606         return $res;
607 }
608 __PACKAGE__->register_method(  
609         api_name        => "open-ils.ingest.fingerprint.xml",
610         method          => "biblio_fingerprint",
611         api_level       => 1,
612         argc            => 1,
613 );                      
614
615
616 1;
617
618 __END__
619
620 sub in_transaction {
621         OpenILS::Application::Ingest->post_init();
622         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
623 }
624
625 sub begin_transaction {
626         my $self = shift;
627         my $client = shift;
628         
629         OpenILS::Application::Ingest->post_init();
630         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
631         
632         try {
633                 if (!$outer_xact) {
634                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
635                         #__PACKAGE__->st_sess->connect;
636                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
637                         unless (defined $r and $r) {
638                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
639                                 #__PACKAGE__->st_sess->disconnect;
640                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
641                         }
642                 }
643         } otherwise {
644                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
645         };
646
647         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
648 }
649
650 sub rollback_transaction {
651         my $self = shift;
652         my $client = shift;
653
654         OpenILS::Application::Ingest->post_init();
655         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
656
657         try {
658                 if ($outer_xact) {
659                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
660                 } else {
661                         $log->debug("Ingest isn't inside a transaction.", INFO);
662                 }
663         } catch Error with {
664                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
665         };
666
667         return 1;
668 }
669
670 sub commit_transaction {
671         my $self = shift;
672         my $client = shift;
673
674         OpenILS::Application::Ingest->post_init();
675         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
676
677         try {
678                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
679                 if ($outer_xact) {
680                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
681                         unless (defined $r and $r) {
682                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
683                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
684                         }
685                         #__PACKAGE__->st_sess->disconnect;
686                 } else {
687                         $log->debug("Ingest isn't inside a transaction.", INFO);
688                 }
689         } catch Error with {
690                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
691         };
692
693         return 1;
694 }
695
696 sub storage_req {
697         my $self = shift;
698         my $method = shift;
699         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
700         return shift( @res );
701 }
702
703 sub scrub_authority_record {
704         my $self = shift;
705         my $client = shift;
706         my $rec = shift;
707
708         my $commit = 0;
709         if (!OpenILS::Application::Ingest->in_transaction) {
710                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
711                 $commit = 1;
712         }
713
714         my $success = 1;
715         try {
716                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
717
718                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
719                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
720
721                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
722         } otherwise {
723                 $log->debug('Scrubbing failed : '.shift(), ERROR);
724                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
725                 $success = 0;
726         };
727
728         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
729         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
730         return $success;
731 }
732 __PACKAGE__->register_method(  
733         api_name        => "open-ils.worm.scrub.authority",
734         method          => "scrub_authority_record",
735         api_level       => 1,
736         argc            => 1,
737 );                      
738
739
740 sub scrub_metabib_record {
741         my $self = shift;
742         my $client = shift;
743         my $rec = shift;
744
745         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
746                 $rec = OpenILS::Application::Ingest->storage_req(
747                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
748                 );
749         }
750
751         my $commit = 0;
752         if (!OpenILS::Application::Ingest->in_transaction) {
753                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
754                 $commit = 1;
755         }
756
757         my $success = 1;
758         try {
759                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
760                 
761                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
762                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
763                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
764                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
765                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
766                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
767                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
768                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
769
770                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
771                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
772
773                 for my $mr (@$masters) {
774                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
775                         my $others = OpenILS::Application::Ingest->storage_req(
776                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
777
778                         if (@$others) {
779                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
780                                 $mr->master_record($others->[0]->source);
781                                 OpenILS::Application::Ingest->storage_req(
782                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
783                                         { id => $mr->id },
784                                         { master_record => $others->[0]->source, mods => undef }
785                                 );
786                         } else {
787                                 warn "Removing metarecord whose master is $rec";
788                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
789                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
790                                 warn "Metarecord removed";
791                                 $log->debug( "Metarecord removed", DEBUG);
792                         }
793                 }
794
795                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
796
797         } otherwise {
798                 $log->debug('Scrubbing failed : '.shift(), ERROR);
799                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
800                 $success = 0;
801         };
802
803         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
804         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
805         return $success;
806 }
807 __PACKAGE__->register_method(  
808         api_name        => "open-ils.worm.scrub.biblio",
809         method          => "scrub_metabib_record",
810         api_level       => 1,
811         argc            => 1,
812 );                      
813
814 sub wormize_biblio_metarecord {
815         my $self = shift;
816         my $client = shift;
817         my $mrec = shift;
818
819         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
820
821         my $count = 0;
822         for my $r (@$recs) {
823                 my $success = 0;
824                 try {
825                         $success = wormize_biblio_record($self => $client => $r->source);
826                         $client->respond(
827                                 { record  => $r->source,
828                                   metarecord => $rec->metarecord,
829                                   success => $success,
830                                 }
831                         );
832                 } catch Error with {
833                         my $e = shift;
834                         $client->respond(
835                                 { record  => $r->source,
836                                   metarecord => $rec->metarecord,
837                                   success => $success,
838                                   error   => $e,
839                                 }
840                         );
841                 };
842         }
843         return undef;
844 }
845 __PACKAGE__->register_method(
846         api_name        => "open-ils.worm.wormize.metarecord",
847         method          => "wormize_biblio_metarecord",
848         api_level       => 1,
849         argc            => 1,
850         stream          => 1,
851 );
852 __PACKAGE__->register_method(
853         api_name        => "open-ils.worm.wormize.metarecord.nomap",
854         method          => "wormize_biblio_metarecord",
855         api_level       => 1,
856         argc            => 1,
857         stream          => 1,
858 );
859 __PACKAGE__->register_method(
860         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
861         method          => "wormize_biblio_metarecord",
862         api_level       => 1,
863         argc            => 1,
864         stream          => 1,
865 );
866 __PACKAGE__->register_method(
867         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
868         method          => "wormize_biblio_metarecord",
869         api_level       => 1,
870         argc            => 1,
871         stream          => 1,
872 );
873
874
875 sub wormize_biblio_record {
876         my $self = shift;
877         my $client = shift;
878         my $rec = shift;
879
880         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
881                 $rec = OpenILS::Application::Ingest->storage_req(
882                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
883                 );
884         }
885
886
887         my $commit = 0;
888         if (!OpenILS::Application::Ingest->in_transaction) {
889                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
890                 $commit = 1;
891         }
892
893         my $success = 1;
894         try {
895                 # clean up the cruft
896                 unless ($self->api_name =~ /noscrub/o) {
897                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
898                 }
899
900                 # now redo 'em
901                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
902
903                 my @full_rec = ();
904                 my @rec_descriptor = ();
905                 my %field_entry = (
906                         title   => [],
907                         author  => [],
908                         subject => [],
909                         keyword => [],
910                         series  => [],
911                 );
912                 my %metarecord = ();
913                 my @source_map = ();
914                 for my $r (@$bibs) {
915                         try {
916                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
917
918                                 my $xml = $parser->parse_string($r->marc);
919
920                                 #update the fingerprint
921                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
922                                 OpenILS::Application::Ingest->storage_req(
923                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
924                                         { id => $r->id },
925                                         { fingerprint => $fp->{fingerprint},
926                                           quality     => int($fp->{quality}) }
927                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
928
929                                 # the full_rec stuff
930                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
931                                         $fr->record( $r->id );
932                                         push @full_rec, $fr;
933                                 }
934
935                                 # the rec_descriptor stuff
936                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
937                                 $rd->record( $r->id );
938                                 push @rec_descriptor, $rd;
939                         
940                                 # the indexing field entry stuff
941                                 for my $class ( qw/title author subject keyword series/ ) {
942                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
943                                                 $fe->source( $r->id );
944                                                 push @{$field_entry{$class}}, $fe;
945                                         }
946                                 }
947
948                                 unless ($self->api_name =~ /nomap/o) {
949                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
950                                 
951                                         unless ($mr) {
952                                                 $mr = Fieldmapper::metabib::metarecord->new;
953                                                 $mr->fingerprint( $fp->{fingerprint} );
954                                                 $mr->master_record( $r->id );
955                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
956                                         }
957
958                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
959                                         $mr_map->metarecord( $mr->id );
960                                         $mr_map->source( $r->id );
961                                         push @source_map, $mr_map;
962
963                                         $metarecord{$mr->id} = $mr;
964                                 }
965                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
966                         } otherwise {
967                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
968                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
969                         };
970                 }
971                 
972
973                 if (@rec_descriptor) {
974                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
975
976                         OpenILS::Application::Ingest->storage_req(
977                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
978                                 @source_map
979                         ) if (@source_map);
980
981                         for my $mr ( values %metarecord ) {
982                                 my $sources = OpenILS::Application::Ingest->storage_req(
983                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
984                                         $mr->id
985                                 );
986
987                                 my $bibs = OpenILS::Application::Ingest->storage_req(
988                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
989                                         [ map { $_->source } @$sources ]
990                                 );
991
992                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
993
994                                 OpenILS::Application::Ingest->storage_req(
995                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
996                                         { id => $mr->id },
997                                         { master_record => $master->id, mods => undef }
998                                 );
999                         }
1000
1001                         OpenILS::Application::Ingest->storage_req(
1002                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1003                                 @rec_descriptor
1004                         ) if (@rec_descriptor);
1005
1006                         OpenILS::Application::Ingest->storage_req(
1007                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1008                                 @full_rec
1009                         ) if (@full_rec);
1010
1011                         OpenILS::Application::Ingest->storage_req(
1012                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1013                                 @{ $field_entry{title} }
1014                         ) if (@{ $field_entry{title} });
1015
1016                         OpenILS::Application::Ingest->storage_req(
1017                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1018                                 @{ $field_entry{author} }
1019                         ) if (@{ $field_entry{author} });
1020                         
1021                         OpenILS::Application::Ingest->storage_req(
1022                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1023                                 @{ $field_entry{subject} }
1024                         ) if (@{ $field_entry{subject} });
1025
1026                         OpenILS::Application::Ingest->storage_req(
1027                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1028                                 @{ $field_entry{keyword} }
1029                         ) if (@{ $field_entry{keyword} });
1030
1031                         OpenILS::Application::Ingest->storage_req(
1032                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1033                                 @{ $field_entry{series} }
1034                         ) if (@{ $field_entry{series} });
1035
1036                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1037                 } else {
1038                         $success = 0;
1039                 }
1040
1041         } otherwise {
1042                 $log->debug('Wormization failed : '.shift(), ERROR);
1043                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1044                 $success = 0;
1045         };
1046
1047         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1048         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1049         return $success;
1050 }
1051 __PACKAGE__->register_method(
1052         api_name        => "open-ils.worm.wormize.biblio",
1053         method          => "wormize_biblio_record",
1054         api_level       => 1,
1055         argc            => 1,
1056 );
1057 __PACKAGE__->register_method(
1058         api_name        => "open-ils.worm.wormize.biblio.nomap",
1059         method          => "wormize_biblio_record",
1060         api_level       => 1,
1061         argc            => 1,
1062 );
1063 __PACKAGE__->register_method(
1064         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1065         method          => "wormize_biblio_record",
1066         api_level       => 1,
1067         argc            => 1,
1068 );
1069 __PACKAGE__->register_method(
1070         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1071         method          => "wormize_biblio_record",
1072         api_level       => 1,
1073         argc            => 1,
1074 );
1075
1076 sub wormize_authority_record {
1077         my $self = shift;
1078         my $client = shift;
1079         my $rec = shift;
1080
1081         my $commit = 0;
1082         if (!OpenILS::Application::Ingest->in_transaction) {
1083                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1084                 $commit = 1;
1085         }
1086
1087         my $success = 1;
1088         try {
1089                 # clean up the cruft
1090                 unless ($self->api_name =~ /noscrub/o) {
1091                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1092                 }
1093
1094                 # now redo 'em
1095                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1096
1097                 my @full_rec = ();
1098                 my @rec_descriptor = ();
1099                 for my $r (@$bibs) {
1100                         my $xml = $parser->parse_string($r->marc);
1101
1102                         # the full_rec stuff
1103                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1104                                 $fr->record( $r->id );
1105                                 push @full_rec, $fr;
1106                         }
1107
1108                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1109                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1110                         #$rd->record( $r->id );
1111                         #push @rec_descriptor, $rd;
1112                         
1113                 }
1114
1115                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1116
1117                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1118                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1119
1120                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1121
1122         } otherwise {
1123                 $log->debug('Wormization failed : '.shift(), ERROR);
1124                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1125                 $success = 0;
1126         };
1127
1128         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1129         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1130         return $success;
1131 }
1132 __PACKAGE__->register_method(
1133         api_name        => "open-ils.worm.wormize.authority",
1134         method          => "wormize_authority_record",
1135         api_level       => 1,
1136         argc            => 1,
1137 );
1138 __PACKAGE__->register_method(
1139         api_name        => "open-ils.worm.wormize.authority.noscrub",
1140         method          => "wormize_authority_record",
1141         api_level       => 1,
1142         argc            => 1,
1143 );
1144
1145
1146 # --------------------------------------------------------------------------------
1147 # MARC index extraction
1148
1149 package OpenILS::Application::Ingest::XPATH;
1150 use base qw/OpenILS::Application::Ingest/;
1151 use Unicode::Normalize;
1152
1153 # give this a MODS documentElement and an XPATH expression
1154 sub _xpath_to_string {
1155         my $xml = shift;
1156         my $xpath = shift;
1157         my $ns_uri = shift;
1158         my $ns_prefix = shift;
1159         my $unique = shift;
1160
1161         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1162
1163         my $string = "";
1164
1165         # grab the set of matching nodes
1166         my @nodes = $xml->findnodes( $xpath );
1167         for my $value (@nodes) {
1168
1169                 # grab all children of the node
1170                 my @children = $value->childNodes();
1171                 for my $child (@children) {
1172
1173                         # add the childs content to the growing buffer
1174                         my $content = quotemeta($child->textContent);
1175                         next if ($unique && $string =~ /$content/);  # uniquify the values
1176                         $string .= $child->textContent . " ";
1177                 }
1178                 if( ! @children ) {
1179                         $string .= $value->textContent . " ";
1180                 }
1181         }
1182         return NFD($string);
1183 }
1184
1185 sub class_all_index_string_xml {
1186         my $self = shift;
1187         my $client = shift;
1188         my $xml = shift;
1189         my $class = shift;
1190
1191         OpenILS::Application::Ingest->post_init();
1192         $xml = $parser->parse_string($xml) unless (ref $xml);
1193         
1194         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1195         for my $type ( keys %{ $xpathset->{$class} } ) {
1196                 my $value =  _xpath_to_string(
1197                                 $mods_sheet->transform($xml)->documentElement,
1198                                 $xpathset->{$class}->{$type}->{xpath},
1199                                 "http://www.loc.gov/mods/",
1200                                 "mods",
1201                                 1
1202                 );
1203
1204                 next unless $value;
1205
1206                 $value =~ s/\pM+//sgo;
1207                 $value =~ s/\pC+//sgo;
1208                 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1209
1210                 $value =~ s/(\w)\./$1/sgo;
1211                 $value = lc($value);
1212
1213                 my $fm = $class_constructor->new;
1214                 $fm->value( $value );
1215                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1216                 $client->respond($fm);
1217         }
1218         return undef;
1219 }
1220 __PACKAGE__->register_method(  
1221         api_name        => "open-ils.worm.field_entry.class.xml",
1222         method          => "class_all_index_string_xml",
1223         api_level       => 1,
1224         argc            => 1,
1225         stream          => 1,
1226 );                      
1227
1228 sub class_all_index_string_record {
1229         my $self = shift;
1230         my $client = shift;
1231         my $rec = shift;
1232         my $class = shift;
1233
1234         OpenILS::Application::Ingest->post_init();
1235         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1236
1237         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1238                 $fm->source($rec);
1239                 $client->respond($fm);
1240         }
1241         return undef;
1242 }
1243 __PACKAGE__->register_method(  
1244         api_name        => "open-ils.worm.field_entry.class.record",
1245         method          => "class_all_index_string_record",
1246         api_level       => 1,
1247         argc            => 1,
1248         stream          => 1,
1249 );                      
1250
1251
1252 sub class_index_string_xml {
1253         my $self = shift;
1254         my $client = shift;
1255         my $xml = shift;
1256         my $class = shift;
1257         my $type = shift;
1258
1259         OpenILS::Application::Ingest->post_init();
1260         $xml = $parser->parse_string($xml) unless (ref $xml);
1261         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1262 }
1263 __PACKAGE__->register_method(  
1264         api_name        => "open-ils.worm.class.type.xml",
1265         method          => "class_index_string_xml",
1266         api_level       => 1,
1267         argc            => 1,
1268 );                      
1269
1270 sub class_index_string_record {
1271         my $self = shift;
1272         my $client = shift;
1273         my $rec = shift;
1274         my $class = shift;
1275         my $type = shift;
1276
1277         OpenILS::Application::Ingest->post_init();
1278         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1279
1280         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1281         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1282         return $d;
1283 }
1284 __PACKAGE__->register_method(  
1285         api_name        => "open-ils.worm.class.type.record",
1286         method          => "class_index_string_record",
1287         api_level       => 1,
1288         argc            => 1,
1289 );                      
1290
1291 sub xml_xpath {
1292         my $self = shift;
1293         my $client = shift;
1294         my $xml = shift;
1295         my $xpath = shift;
1296         my $uri = shift;
1297         my $prefix = shift;
1298         my $unique = shift;
1299
1300         OpenILS::Application::Ingest->post_init();
1301         $xml = $parser->parse_string($xml) unless (ref $xml);
1302         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1303 }
1304 __PACKAGE__->register_method(  
1305         api_name        => "open-ils.worm.xpath.xml",
1306         method          => "xml_xpath",
1307         api_level       => 1,
1308         argc            => 1,
1309 );                      
1310
1311 sub record_xpath {
1312         my $self = shift;
1313         my $client = shift;
1314         my $rec = shift;
1315         my $xpath = shift;
1316         my $uri = shift;
1317         my $prefix = shift;
1318         my $unique = shift;
1319
1320         OpenILS::Application::Ingest->post_init();
1321         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1322
1323         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1324         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1325         return $d;
1326 }
1327 __PACKAGE__->register_method(  
1328         api_name        => "open-ils.worm.xpath.record",
1329         method          => "record_xpath",
1330         api_level       => 1,
1331         argc            => 1,
1332 );                      
1333
1334
1335 # --------------------------------------------------------------------------------
1336 # MARC Descriptor
1337
1338 package OpenILS::Application::Ingest::Biblio::Leader;
1339 use base qw/OpenILS::Application::Ingest/;
1340 use Unicode::Normalize;
1341
1342 our %marc_type_groups = (
1343         BKS => q/[at]{1}/,
1344         SER => q/[a]{1}/,
1345         VIS => q/[gkro]{1}/,
1346         MIX => q/[p]{1}/,
1347         MAP => q/[ef]{1}/,
1348         SCO => q/[cd]{1}/,
1349         REC => q/[ij]{1}/,
1350         COM => q/[m]{1}/,
1351 );
1352
1353 sub _type_re {
1354         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1355         return qr/$re/;
1356 }
1357
1358 our %biblio_descriptor_code = (
1359         item_type => sub { substr($ldr,6,1); },
1360         item_form =>
1361                 sub {
1362                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1363                                 return substr($oo8,29,1);
1364                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1365                                 return substr($oo8,23,1);
1366                         }
1367                         return ' ';
1368                 },
1369         bib_level => sub { substr($ldr,7,1); },
1370         control_type => sub { substr($ldr,8,1); },
1371         char_encoding => sub { substr($ldr,9,1); },
1372         enc_level => sub { substr($ldr,17,1); },
1373         cat_form => sub { substr($ldr,18,1); },
1374         pub_status => sub { substr($ldr,5,1); },
1375         item_lang => sub { substr($oo8,35,3); },
1376         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1377         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1378         audience => sub { substr($oo8,22,1); },
1379 );
1380
1381 sub _extract_biblio_descriptors {
1382         my $xml = shift;
1383
1384         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1385         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1386         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1387
1388         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1389         for my $rd_field ( keys %biblio_descriptor_code ) {
1390                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1391         }
1392
1393         return $rd_obj;
1394 }
1395
1396 sub extract_biblio_desc_xml {
1397         my $self = shift;
1398         my $client = shift;
1399         my $xml = shift;
1400
1401         $xml = $parser->parse_string($xml) unless (ref $xml);
1402
1403         return _extract_biblio_descriptors( $xml );
1404 }
1405 __PACKAGE__->register_method(  
1406         api_name        => "open-ils.worm.biblio_leader.xml",
1407         method          => "extract_biblio_desc_xml",
1408         api_level       => 1,
1409         argc            => 1,
1410 );                      
1411
1412 sub extract_biblio_desc_record {
1413         my $self = shift;
1414         my $client = shift;
1415         my $rec = shift;
1416
1417         OpenILS::Application::Ingest->post_init();
1418         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1419
1420         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1421         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1422         return $d;
1423 }
1424 __PACKAGE__->register_method(  
1425         api_name        => "open-ils.worm.biblio_leader.record",
1426         method          => "extract_biblio_desc_record",
1427         api_level       => 1,
1428         argc            => 1,
1429 );                      
1430
1431 # --------------------------------------------------------------------------------
1432 # Flat MARC
1433
1434 package OpenILS::Application::Ingest::FlatMARC;
1435 use base qw/OpenILS::Application::Ingest/;
1436 use Unicode::Normalize;
1437
1438
1439 sub _marcxml_to_full_rows {
1440
1441         my $marcxml = shift;
1442         my $xmltype = shift || 'metabib';
1443
1444         my $type = "Fieldmapper::${xmltype}::full_rec";
1445
1446         my @ns_list;
1447         
1448         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1449
1450         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1451                 next unless $tagline;
1452
1453                 my $ns = $type->new;
1454
1455                 $ns->tag( 'LDR' );
1456                 my $val = $tagline->textContent;
1457                 $val = NFD($val);
1458                 $val =~ s/(\pM+)//gso;
1459                 $ns->value( $val );
1460
1461                 push @ns_list, $ns;
1462         }
1463
1464         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1465                 next unless $tagline;
1466
1467                 my $ns = $type->new;
1468
1469                 $ns->tag( $tagline->getAttribute( "tag" ) );
1470                 my $val = $tagline->textContent;
1471                 $val = NFD($val);
1472                 $val =~ s/(\pM+)//gso;
1473                 $ns->value( $val );
1474
1475                 push @ns_list, $ns;
1476         }
1477
1478         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1479                 next unless $tagline;
1480
1481                 my $tag = $tagline->getAttribute( "tag" );
1482                 my $ind1 = $tagline->getAttribute( "ind1" );
1483                 my $ind2 = $tagline->getAttribute( "ind2" );
1484
1485                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1486                         next unless $data;
1487
1488                         my $ns = $type->new;
1489
1490                         $ns->tag( $tag );
1491                         $ns->ind1( $ind1 );
1492                         $ns->ind2( $ind2 );
1493                         $ns->subfield( $data->getAttribute( "code" ) );
1494                         my $val = $data->textContent;
1495                         $val = NFD($val);
1496                         $val =~ s/(\pM+)//gso;
1497                         $ns->value( lc($val) );
1498
1499                         push @ns_list, $ns;
1500                 }
1501         }
1502
1503         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1504         return @ns_list;
1505 }
1506
1507 sub flat_marc_xml {
1508         my $self = shift;
1509         my $client = shift;
1510         my $xml = shift;
1511
1512         $xml = $parser->parse_string($xml) unless (ref $xml);
1513
1514         my $type = 'metabib';
1515         $type = 'authority' if ($self->api_name =~ /authority/o);
1516
1517         OpenILS::Application::Ingest->post_init();
1518
1519         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1520         return undef;
1521 }
1522 __PACKAGE__->register_method(  
1523         api_name        => "open-ils.worm.flat_marc.authority.xml",
1524         method          => "flat_marc_xml",
1525         api_level       => 1,
1526         argc            => 1,
1527         stream          => 1,
1528 );                      
1529 __PACKAGE__->register_method(  
1530         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1531         method          => "flat_marc_xml",
1532         api_level       => 1,
1533         argc            => 1,
1534         stream          => 1,
1535 );                      
1536
1537 sub flat_marc_record {
1538         my $self = shift;
1539         my $client = shift;
1540         my $rec = shift;
1541
1542         my $type = 'biblio';
1543         $type = 'authority' if ($self->api_name =~ /authority/o);
1544
1545         OpenILS::Application::Ingest->post_init();
1546         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1547
1548         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1549         return undef;
1550 }
1551 __PACKAGE__->register_method(  
1552         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1553         method          => "flat_marc_record",
1554         api_level       => 1,
1555         argc            => 1,
1556         stream          => 1,
1557 );                      
1558 __PACKAGE__->register_method(  
1559         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1560         method          => "flat_marc_record",
1561         api_level       => 1,
1562         argc            => 1,
1563         stream          => 1,
1564 );                      
1565
1566
1567 # --------------------------------------------------------------------------------
1568 # Fingerprinting
1569
1570 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1571 use base qw/OpenILS::Application::Ingest/;
1572 use Unicode::Normalize;
1573 use OpenSRF::EX qw/:try/;
1574
1575 my @fp_mods_xpath = (
1576         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1577                         title   => {
1578                                         xpath   => [
1579                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1580                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1581                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1582                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1583                                         ],
1584                                         fixup   => sub {
1585                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1586                                                         $text = NFD($text);
1587                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1588                                                         $text =~ s/\pM+//gso;
1589                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1590                                                         $text = lc($text);
1591                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1592                                                         $text =~ s/\s+/ /sgo;
1593                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1594                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1595                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1596                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1597                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1598                                                         $text =~ s/\[.[^\]]+\]//sgo;
1599                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1600                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1601                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1602                                                 },
1603                         },
1604                         author  => {
1605                                         xpath   => [
1606                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1607                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1608                                         ],
1609                                         fixup   => sub {
1610                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1611                                                         $text = NFD($text);
1612                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1613                                                         $text =~ s/\pM+//gso;
1614                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1615                                                         $text = lc($text);
1616                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1617                                                         $text =~ s/\s+/ /sgo;
1618                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1619                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1620                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1621                                                         $text =~ s/,?\s+.*$//sgo;
1622                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1623                                                 },
1624                         },
1625         ],
1626
1627         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1628                         title   => {
1629                                         xpath   => [
1630                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1631                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1632                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1633                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1634                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1635                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1636                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1637                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1638                                         ],
1639                                         fixup   => sub {
1640                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1641                                                         $text = NFD($text);
1642                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1643                                                         $text =~ s/\pM+//gso;
1644                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1645                                                         $text = lc($text);
1646                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1647                                                         $text =~ s/\s+/ /sgo;
1648                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1649                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1650                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1651                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1652                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1653                                                         $text =~ s/\[.[^\]]+\]//sgo;
1654                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1655                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1656                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1657                                                 },
1658                         },
1659                         author  => {
1660                                         xpath   => [
1661                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1662                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1663                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1664                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1665                                         ],
1666                                         fixup   => sub {
1667                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1668                                                         $text = NFD($text);
1669                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1670                                                         $text =~ s/\pM+//gso;
1671                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1672                                                         $text = lc($text);
1673                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1674                                                         $text =~ s/\s+/ /sgo;
1675                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1676                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1677                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1678                                                         $text =~ s/,?\s+.*$//sgo;
1679                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1680                                                 },
1681                         },
1682         ],
1683
1684 );
1685
1686 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1687
1688 sub _fp_mods {
1689         my $mods = shift;
1690         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1691
1692         my $fp_string = '';
1693
1694         my $match_index = 0;
1695         my $block_index = 1;
1696         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1697                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1698
1699                         my $block_name_index = 0;
1700                         my $block_value_index = 1;
1701                         my $block = $fp_mods_xpath[$block_index];
1702                         while ( my $part = $$block[$block_value_index] ) {
1703                                 local $text;
1704                                 for my $xpath ( @{ $part->{xpath} } ) {
1705                                         $text = $mods->findvalue( $xpath );
1706                                         last if ($text);
1707                                 }
1708
1709                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1710
1711                                 if ($text) {
1712                                         $$part{fixup}->();
1713                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1714                                         $fp_string .= $text;
1715                                 }
1716
1717                                 $block_name_index += 2;
1718                                 $block_value_index += 2;
1719                         }
1720                 }
1721                 if ($fp_string) {
1722                         $fp_string =~ s/\W+//gso;
1723                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1724                         return $fp_string;
1725                 }
1726
1727                 $match_index += 2;
1728                 $block_index += 2;
1729         }
1730         return undef;
1731 }
1732
1733 sub refingerprint_bibrec {
1734         my $self = shift;
1735         my $client = shift;
1736         my $rec = shift;
1737
1738         my $commit = 0;
1739         if (!OpenILS::Application::Ingest->in_transaction) {
1740                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1741                 $commit = 1;
1742         }
1743
1744         my $success = 1;
1745         try {
1746                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1747                 for my $b (@$bibs) {
1748                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1749
1750                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1751
1752                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1753
1754                                 OpenILS::Application::Ingest->storage_req(
1755                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1756                                         { id => $b->id },
1757                                         { fingerprint => $fp->{fingerprint},
1758                                           quality     => $fp->{quality} }
1759                                 );
1760
1761                                 if ($self->api_name !~ /nomap/o) {
1762                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
1763                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1764                                                 $b->id
1765                                         );
1766
1767                                         my $old_mrid;
1768                                         if (ref($old_source_map) and @$old_source_map) {
1769                                                 for my $m (@$old_source_map) {
1770                                                         $old_mrid = $m->metarecord;
1771                                                         OpenILS::Application::Ingest->storage_req(
1772                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1773                                                                 $m->id
1774                                                         );
1775                                                 }
1776                                         }
1777
1778                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
1779                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1780                                                         { metarecord => $old_mrid }
1781                                         ) if ($old_mrid);
1782
1783                                         if (ref($old_sm) and @$old_sm == 0) {
1784                                                 OpenILS::Application::Ingest->storage_req(
1785                                                         'open-ils.storage.direct.metabib.metarecord.delete',
1786                                                         $old_mrid
1787                                                 );
1788                                         }
1789
1790                                         my $mr = OpenILS::Application::Ingest->storage_req(
1791                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1792                                                         { fingerprint => $fp->{fingerprint} }
1793                                         )->[0];
1794                                 
1795                                         unless ($mr) {
1796                                                 $mr = Fieldmapper::metabib::metarecord->new;
1797                                                 $mr->fingerprint( $fp->{fingerprint} );
1798                                                 $mr->master_record( $b->id );
1799                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1800                                         }
1801
1802                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1803                                         $mr_map->metarecord( $mr->id );
1804                                         $mr_map->source( $b->id );
1805                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1806
1807                                 }
1808                         }
1809                         $client->respond($b->id);
1810                 }
1811
1812         } otherwise {
1813                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1814                 $success = 0;
1815         };
1816
1817         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1818         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1819         return undef;
1820 }
1821 __PACKAGE__->register_method(  
1822         api_name        => "open-ils.worm.fingerprint.record.update",
1823         method          => "refingerprint_bibrec",
1824         api_level       => 1,
1825         argc            => 1,
1826         stream          => 1,
1827 );                      
1828
1829 __PACKAGE__->register_method(  
1830         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
1831         method          => "refingerprint_bibrec",
1832         api_level       => 1,
1833         argc            => 1,
1834 );                      
1835
1836 =comment
1837
1838 sub fingerprint_bibrec {
1839         my $self = shift;
1840         my $client = shift;
1841         my $rec = shift;
1842
1843         OpenILS::Application::Ingest->post_init();
1844         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1845
1846         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1847         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1848         return $fp;
1849
1850 }
1851 __PACKAGE__->register_method(  
1852         api_name        => "open-ils.worm.fingerprint.record",
1853         method          => "fingerprint_bibrec",
1854         api_level       => 0,
1855         argc            => 1,
1856 );                      
1857
1858
1859 sub fingerprint_mods {
1860         my $self = shift;
1861         my $client = shift;
1862         my $xml = shift;
1863
1864         OpenILS::Application::Ingest->post_init();
1865         my $mods = $parser->parse_string($xml)->documentElement;
1866
1867         return _fp_mods( $mods );
1868 }
1869 __PACKAGE__->register_method(  
1870         api_name        => "open-ils.worm.fingerprint.mods",
1871         method          => "fingerprint_mods",
1872         api_level       => 1,
1873         argc            => 1,
1874 );                      
1875
1876 sub fingerprint_marc {
1877         my $self = shift;
1878         my $client = shift;
1879         my $xml = shift;
1880
1881         $xml = $parser->parse_string($xml) unless (ref $xml);
1882
1883         OpenILS::Application::Ingest->post_init();
1884         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1885         $log->debug("Returning [$fp] as fingerprint", INFO);
1886         return $fp;
1887 }
1888 __PACKAGE__->register_method(  
1889         api_name        => "open-ils.worm.fingerprint.marc",
1890         method          => "fingerprint_marc",
1891         api_level       => 1,
1892         argc            => 1,
1893 );                      
1894
1895
1896 =cut
1897
1898 sub biblio_fingerprint_record {
1899         my $self = shift;
1900         my $client = shift;
1901         my $rec = shift;
1902
1903         OpenILS::Application::Ingest->post_init();
1904
1905         my $marc = OpenILS::Application::Ingest
1906                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1907                         ->marc;
1908
1909         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1910         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1911         return $fp;
1912 }
1913 __PACKAGE__->register_method(  
1914         api_name        => "open-ils.worm.fingerprint.record",
1915         method          => "biblio_fingerprint_record",
1916         api_level       => 1,
1917         argc            => 1,
1918 );                      
1919
1920 our $fp_script;
1921 sub biblio_fingerprint {
1922         my $self = shift;
1923         my $client = shift;
1924         my $marc = shift;
1925
1926         OpenILS::Application::Ingest->post_init();
1927
1928         $marc = $parser->parse_string($marc) unless (ref $marc);
1929
1930         my $mods = OpenILS::Application::Ingest::entityize(
1931                 $mods_sheet
1932                         ->transform( $marc )
1933                         ->documentElement
1934                         ->toString,
1935                 'D'
1936         );
1937
1938         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
1939
1940         warn $marc;
1941         $log->internal("Got MARC [$marc]");
1942         $log->internal("Created MODS [$mods]");
1943
1944         if(!$fp_script) {
1945                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1946                 my $conf = OpenSRF::Utils::SettingsClient->new;
1947
1948                 my $libs        = $conf->config_value(@pfx, 'script_path');
1949                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1950                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1951
1952                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1953                 
1954                 $fp_script = new OpenILS::Utils::ScriptRunner
1955                         ( file          => $script_file,
1956                           paths         => $script_libs,
1957                           reset_count   => 1000 );
1958         }
1959
1960         $log->debug("Applying environment for biblio fingerprinting...");
1961
1962         my $env = {marc => $marc, mods => $mods};
1963         #my $res = {fingerprint => '', quality => '0'};
1964
1965         $fp_script->insert('environment' => $env);
1966         #$fp_script->insert('result' => $res);
1967
1968         $log->debug("Running script for biblio fingerprinting...");
1969
1970         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
1971
1972         $log->debug("Script for biblio fingerprinting completed successfully...");
1973
1974         return $res;
1975 }
1976 __PACKAGE__->register_method(  
1977         api_name        => "open-ils.worm.fingerprint.marc",
1978         method          => "biblio_fingerprint",
1979         api_level       => 1,
1980         argc            => 1,
1981 );                      
1982
1983 # --------------------------------------------------------------------------------
1984
1985 1;
1986
1987 __END__
1988 my $in_xact;
1989 my $begin;
1990 my $commit;
1991 my $rollback;
1992 my $lookup;
1993 my $update_entry;
1994 my $mr_lookup;
1995 my $mr_update;
1996 my $mr_create;
1997 my $create_source_map;
1998 my $sm_lookup;
1999 my $rm_old_rd;
2000 my $rm_old_sm;
2001 my $rm_old_fr;
2002 my $rm_old_tr;
2003 my $rm_old_ar;
2004 my $rm_old_sr;
2005 my $rm_old_kr;
2006 my $rm_old_ser;
2007
2008 my $fr_create;
2009 my $rd_create;
2010 my $create = {};
2011
2012 my %descriptor_code = (
2013         item_type => 'substr($ldr,6,1)',
2014         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2015         bib_level => 'substr($ldr,7,1)',
2016         control_type => 'substr($ldr,8,1)',
2017         char_encoding => 'substr($ldr,9,1)',
2018         enc_level => 'substr($ldr,17,1)',
2019         cat_form => 'substr($ldr,18,1)',
2020         pub_status => 'substr($ldr,5,1)',
2021         item_lang => 'substr($oo8,35,3)',
2022         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2023         audience => 'substr($oo8,22,1)',
2024 );
2025
2026 sub wormize {
2027
2028         my $self = shift;
2029         my $client = shift;
2030         my @docids = @_;
2031
2032         my $no_map = 0;
2033         if ($self->api_name =~ /no_map/o) {
2034                 $no_map = 1;
2035         }
2036
2037         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2038                 unless ($in_xact);
2039         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2040                 unless ($begin);
2041         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2042                 unless ($commit);
2043         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2044                 unless ($rollback);
2045         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2046                 unless ($sm_lookup);
2047         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2048                 unless ($mr_lookup);
2049         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2050                 unless ($mr_update);
2051         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2052                 unless ($lookup);
2053         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2054                 unless ($update_entry);
2055         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2056                 unless ($rm_old_sm);
2057         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2058                 unless ($rm_old_rd);
2059         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2060                 unless ($rm_old_fr);
2061         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2062                 unless ($rm_old_tr);
2063         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2064                 unless ($rm_old_ar);
2065         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2066                 unless ($rm_old_sr);
2067         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2068                 unless ($rm_old_kr);
2069         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2070                 unless ($rm_old_ser);
2071         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2072                 unless ($mr_create);
2073         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2074                 unless ($create_source_map);
2075         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2076                 unless ($rd_create);
2077         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2078                 unless ($fr_create);
2079         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2080                 unless ($$create{title});
2081         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2082                 unless ($$create{author});
2083         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2084                 unless ($$create{subject});
2085         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2086                 unless ($$create{keyword});
2087         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2088                 unless ($$create{series});
2089
2090
2091         my ($outer_xact) = $in_xact->run;
2092         try {
2093                 unless ($outer_xact) {
2094                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2095                         my ($r) = $begin->run($client);
2096                         unless (defined $r and $r) {
2097                                 $rollback->run;
2098                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2099                         }
2100                 }
2101         } catch Error with {
2102                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2103         };
2104
2105         my @source_maps;
2106         my @entry_list;
2107         my @mr_list;
2108         my @rd_list;
2109         my @ns_list;
2110         my @mods_data;
2111         my $ret = 0;
2112         for my $entry ( $lookup->run(@docids) ) {
2113                 # step -1: grab the doc from storage
2114                 next unless ($entry);
2115
2116                 if(!$mods_sheet) {
2117                         my $xslt_doc = $parser->parse_file(
2118                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2119                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2120                 }
2121
2122                 my $xml = $entry->marc;
2123                 my $docid = $entry->id;
2124                 my $marcdoc = $parser->parse_string($xml);
2125                 my $modsdoc = $mods_sheet->transform($marcdoc);
2126
2127                 my $mods = $modsdoc->documentElement;
2128                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2129
2130                 $entry->fingerprint( fingerprint_mods( $mods ) );
2131                 push @entry_list, $entry;
2132
2133                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2134
2135                 unless ($no_map) {
2136                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2137                         if (!$mr || !@$mr) {
2138                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2139                                 $mr = new Fieldmapper::metabib::metarecord;
2140                                 $mr->fingerprint( $entry->fingerprint );
2141                                 $mr->master_record( $entry->id );
2142                                 my ($new_mr) = $mr_create->run($mr);
2143                                 $mr->id($new_mr);
2144                                 unless (defined $mr) {
2145                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2146                                 }
2147                         } else {
2148                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2149                                 $mr->mods('');
2150                                 push @mr_list, $mr;
2151                         }
2152
2153                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2154                         $sm->metarecord( $mr->id );
2155                         $sm->source( $entry->id );
2156                         push @source_maps, $sm;
2157                 }
2158
2159                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2160                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2161
2162                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2163                 for my $rd_field ( keys %descriptor_code ) {
2164                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2165                 }
2166                 $rd_obj->record( $docid );
2167                 push @rd_list, $rd_obj;
2168
2169                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2170
2171                 # step 2: build the KOHA rows
2172                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2173                 $_->record( $docid ) for (@tmp_list);
2174                 push @ns_list, @tmp_list;
2175
2176                 $ret++;
2177
2178                 last unless ($self->api_name =~ /batch$/o);
2179         }
2180
2181         $rm_old_rd->run( { record => \@docids } );
2182         $rm_old_fr->run( { record => \@docids } );
2183         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2184         $rm_old_tr->run( { source => \@docids } );
2185         $rm_old_ar->run( { source => \@docids } );
2186         $rm_old_sr->run( { source => \@docids } );
2187         $rm_old_kr->run( { source => \@docids } );
2188         $rm_old_ser->run( { source => \@docids } );
2189
2190         unless ($no_map) {
2191                 my ($sm) = $create_source_map->run(@source_maps);
2192                 unless (defined $sm) {
2193                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2194                 }
2195                 my ($mr) = $mr_update->run(@mr_list);
2196                 unless (defined $mr) {
2197                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2198                 }
2199         }
2200
2201         my ($re) = $update_entry->run(@entry_list);
2202         unless (defined $re) {
2203                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2204         }
2205
2206         my ($rd) = $rd_create->run(@rd_list);
2207         unless (defined $rd) {
2208                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2209         }
2210
2211         my ($fr) = $fr_create->run(@ns_list);
2212         unless (defined $fr) {
2213                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2214         }
2215
2216         # step 5: insert the new metadata
2217         for my $class ( qw/title author subject keyword series/ ) {
2218                 my @md_list = ();
2219                 for my $doc ( @mods_data ) {
2220                         my ($did) = keys %$doc;
2221                         my ($data) = values %$doc;
2222
2223                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2224                         for my $row ( keys %{ $$data{$class} } ) {
2225                                 next unless (exists $$data{$class}{$row});
2226                                 next unless ($$data{$class}{$row}{value});
2227                                 my $fm_obj = $fm_constructor->new;
2228                                 $fm_obj->value( $$data{$class}{$row}{value} );
2229                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2230                                 $fm_obj->source( $did );
2231                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2232
2233                                 push @md_list, $fm_obj;
2234                         }
2235                 }
2236                         
2237                 my ($cr) = $$create{$class}->run(@md_list);
2238                 unless (defined $cr) {
2239                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2240                 }
2241         }
2242
2243         unless ($outer_xact) {
2244                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2245                 my ($c) = $commit->run;
2246                 unless (defined $c and $c) {
2247                         $rollback->run;
2248                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2249                 }
2250         }
2251
2252         return $ret;
2253 }
2254 __PACKAGE__->register_method( 
2255         api_name        => "open-ils.worm.wormize",
2256         method          => "wormize",
2257         api_level       => 1,
2258         argc            => 1,
2259 );
2260 __PACKAGE__->register_method( 
2261         api_name        => "open-ils.worm.wormize.no_map",
2262         method          => "wormize",
2263         api_level       => 1,
2264         argc            => 1,
2265 );
2266 __PACKAGE__->register_method( 
2267         api_name        => "open-ils.worm.wormize.batch",
2268         method          => "wormize",
2269         api_level       => 1,
2270         argc            => 1,
2271 );
2272 __PACKAGE__->register_method( 
2273         api_name        => "open-ils.worm.wormize.no_map.batch",
2274         method          => "wormize",
2275         api_level       => 1,
2276         argc            => 1,
2277 );
2278
2279
2280 my $ain_xact;
2281 my $abegin;
2282 my $acommit;
2283 my $arollback;
2284 my $alookup;
2285 my $aupdate_entry;
2286 my $amr_lookup;
2287 my $amr_update;
2288 my $amr_create;
2289 my $acreate_source_map;
2290 my $asm_lookup;
2291 my $arm_old_rd;
2292 my $arm_old_sm;
2293 my $arm_old_fr;
2294 my $arm_old_tr;
2295 my $arm_old_ar;
2296 my $arm_old_sr;
2297 my $arm_old_kr;
2298 my $arm_old_ser;
2299
2300 my $afr_create;
2301 my $ard_create;
2302 my $acreate = {};
2303
2304 sub authority_wormize {
2305
2306         my $self = shift;
2307         my $client = shift;
2308         my @docids = @_;
2309
2310         my $no_map = 0;
2311         if ($self->api_name =~ /no_map/o) {
2312                 $no_map = 1;
2313         }
2314
2315         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2316                 unless ($in_xact);
2317         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2318                 unless ($begin);
2319         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2320                 unless ($commit);
2321         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2322                 unless ($rollback);
2323         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2324                 unless ($alookup);
2325         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2326                 unless ($aupdate_entry);
2327         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2328                 unless ($arm_old_rd);
2329         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2330                 unless ($arm_old_fr);
2331         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2332                 unless ($ard_create);
2333         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2334                 unless ($afr_create);
2335
2336
2337         my ($outer_xact) = $in_xact->run;
2338         try {
2339                 unless ($outer_xact) {
2340                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2341                         my ($r) = $begin->run($client);
2342                         unless (defined $r and $r) {
2343                                 $rollback->run;
2344                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2345                         }
2346                 }
2347         } catch Error with {
2348                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2349         };
2350
2351         my @source_maps;
2352         my @entry_list;
2353         my @mr_list;
2354         my @rd_list;
2355         my @ns_list;
2356         my @mads_data;
2357         my $ret = 0;
2358         for my $entry ( $lookup->run(@docids) ) {
2359                 # step -1: grab the doc from storage
2360                 next unless ($entry);
2361
2362                 #if(!$mads_sheet) {
2363                 #       my $xslt_doc = $parser->parse_file(
2364                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2365                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2366                 #}
2367
2368                 my $xml = $entry->marc;
2369                 my $docid = $entry->id;
2370                 my $marcdoc = $parser->parse_string($xml);
2371                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2372
2373                 #my $mads = $madsdoc->documentElement;
2374                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2375
2376                 push @entry_list, $entry;
2377
2378                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2379                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2380
2381                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2382                 for my $rd_field ( keys %descriptor_code ) {
2383                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2384                 }
2385                 $rd_obj->record( $docid );
2386                 push @rd_list, $rd_obj;
2387
2388                 # step 2: build the KOHA rows
2389                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2390                 $_->record( $docid ) for (@tmp_list);
2391                 push @ns_list, @tmp_list;
2392
2393                 $ret++;
2394
2395                 last unless ($self->api_name =~ /batch$/o);
2396         }
2397
2398         $arm_old_rd->run( { record => \@docids } );
2399         $arm_old_fr->run( { record => \@docids } );
2400
2401         my ($rd) = $ard_create->run(@rd_list);
2402         unless (defined $rd) {
2403                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2404         }
2405
2406         my ($fr) = $fr_create->run(@ns_list);
2407         unless (defined $fr) {
2408                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2409         }
2410
2411         unless ($outer_xact) {
2412                 $log->debug("Commiting transaction started by Ingest.", INFO);
2413                 my ($c) = $commit->run;
2414                 unless (defined $c and $c) {
2415                         $rollback->run;
2416                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2417                 }
2418         }
2419
2420         return $ret;
2421 }
2422 __PACKAGE__->register_method( 
2423         api_name        => "open-ils.worm.authortiy.wormize",
2424         method          => "wormize",
2425         api_level       => 1,
2426         argc            => 1,
2427 );
2428 __PACKAGE__->register_method( 
2429         api_name        => "open-ils.worm.authority.wormize.batch",
2430         method          => "wormize",
2431         api_level       => 1,
2432         argc            => 1,
2433 );
2434
2435
2436 # --------------------------------------------------------------------------------
2437
2438
2439 sub _marcxml_to_full_rows {
2440
2441         my $marcxml = shift;
2442         my $type = shift || 'Fieldmapper::metabib::full_rec';
2443
2444         my @ns_list;
2445         
2446         my $root = $marcxml->documentElement;
2447
2448         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2449                 next unless $tagline;
2450
2451                 my $ns = new Fieldmapper::metabib::full_rec;
2452
2453                 $ns->tag( 'LDR' );
2454                 my $val = NFD($tagline->textContent);
2455                 $val =~ s/(\pM+)//gso;
2456                 $ns->value( $val );
2457
2458                 push @ns_list, $ns;
2459         }
2460
2461         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2462                 next unless $tagline;
2463
2464                 my $ns = new Fieldmapper::metabib::full_rec;
2465
2466                 $ns->tag( $tagline->getAttribute( "tag" ) );
2467                 my $val = NFD($tagline->textContent);
2468                 $val =~ s/(\pM+)//gso;
2469                 $ns->value( $val );
2470
2471                 push @ns_list, $ns;
2472         }
2473
2474         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2475                 next unless $tagline;
2476
2477                 my $tag = $tagline->getAttribute( "tag" );
2478                 my $ind1 = $tagline->getAttribute( "ind1" );
2479                 my $ind2 = $tagline->getAttribute( "ind2" );
2480
2481                 for my $data ( $tagline->childNodes ) {
2482                         next unless $data;
2483
2484                         my $ns = $type->new;
2485
2486                         $ns->tag( $tag );
2487                         $ns->ind1( $ind1 );
2488                         $ns->ind2( $ind2 );
2489                         $ns->subfield( $data->getAttribute( "code" ) );
2490                         my $val = NFD($data->textContent);
2491                         $val =~ s/(\pM+)//gso;
2492                         $ns->value( lc($val) );
2493
2494                         push @ns_list, $ns;
2495                 }
2496         }
2497         return @ns_list;
2498 }
2499
2500 sub _get_field_value {
2501
2502         my( $root, $xpath ) = @_;
2503
2504         my $string = "";
2505
2506         # grab the set of matching nodes
2507         my @nodes = $root->findnodes( $xpath );
2508         for my $value (@nodes) {
2509
2510                 # grab all children of the node
2511                 my @children = $value->childNodes();
2512                 for my $child (@children) {
2513
2514                         # add the childs content to the growing buffer
2515                         my $content = quotemeta($child->textContent);
2516                         next if ($string =~ /$content/);  # uniquify the values
2517                         $string .= $child->textContent . " ";
2518                 }
2519                 if( ! @children ) {
2520                         $string .= $value->textContent . " ";
2521                 }
2522         }
2523         $string = NFD($string);
2524         $string =~ s/(\pM)//gso;
2525         return lc($string);
2526 }
2527
2528
2529 sub modsdoc_to_values {
2530         my( $self, $mods ) = @_;
2531         my $data = {};
2532         for my $class (keys %$xpathset) {
2533                 $data->{$class} = {};
2534                 for my $type (keys %{$xpathset->{$class}}) {
2535                         $data->{$class}->{$type} = {};
2536                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2537                 }
2538         }
2539         return $data;
2540 }
2541
2542
2543 1;
2544
2545