]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
importer parts
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
25         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
26         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
27         atom    => {ns => 'http://www.w3.org/2005/Atom'},
28         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
29         rss092  => {ns => ''},
30         rss093  => {ns => ''},
31         rss094  => {ns => ''},
32         rss10   => {ns => 'http://purl.org/rss/1.0/'},
33         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
34         rss2    => {ns => ''},
35 );
36
37
38 my $log = 'OpenSRF::Utils::Logger';
39
40 my  $parser = XML::LibXML->new();
41 my  $xslt = XML::LibXSLT->new();
42
43 my  $mods_sheet;
44 my  $mads_sheet;
45 my  $xpathset = {};
46 sub initialize {}
47 sub child_init {}
48
49 sub post_init {
50
51         unless (keys %$xpathset) {
52                 $log->debug("Running post_init", DEBUG);
53
54                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
55
56                 unless ($supported_formats{mods}{xslt}) {
57                         $log->debug("Loading MODS XSLT", DEBUG);
58                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
59                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
60                 }
61
62                 unless ($supported_formats{mods3}{xslt}) {
63                         $log->debug("Loading MODS v3 XSLT", DEBUG);
64                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
65                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
66                 }
67
68
69                 my $req = OpenSRF::AppSession
70                                 ->create('open-ils.cstore')
71                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
72                                 ->gather(1);
73
74                 if (ref $req and @$req) {
75                         for my $f (@$req) {
76                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
79                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
80                         }
81                 }
82         }
83 }
84
85 sub entityize {
86         my $stuff = shift;
87         my $form = shift;
88
89         if ($form eq 'D') {
90                 $stuff = NFD($stuff);
91         } else {
92                 $stuff = NFC($stuff);
93         }
94
95         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
96         return $stuff;
97 }
98
99 # --------------------------------------------------------------------------------
100 # Biblio ingest
101
102 package OpenILS::Application::Ingest::Biblio;
103 use base qw/OpenILS::Application::Ingest/;
104 use Unicode::Normalize;
105
106 sub ro_biblio_ingest_single_object {
107         my $self = shift;
108         my $client = shift;
109         my $bib = shift;
110         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
111
112         my $document = $parser->parse_string($xml);
113
114         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
115         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
116         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
117         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
118
119         $_->source($bib->id) for (@mXfe);
120         $_->record($bib->id) for (@mfr);
121         $rd->record($bib->id) if ($rd);
122
123         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
124 }
125 __PACKAGE__->register_method(  
126         api_name        => "open-ils.ingest.full.biblio.object.readonly",
127         method          => "ro_biblio_ingest_single_object",
128         api_level       => 1,
129         argc            => 1,
130 );                      
131
132 sub ro_biblio_ingest_single_xml {
133         my $self = shift;
134         my $client = shift;
135         my $xml = OpenILS::Application::Ingest::entityize(shift);
136
137         my $document = $parser->parse_string($xml);
138
139         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
140         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
141         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
142         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
143
144         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
145 }
146 __PACKAGE__->register_method(  
147         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
148         method          => "ro_biblio_ingest_single_xml",
149         api_level       => 1,
150         argc            => 1,
151 );                      
152
153 sub ro_biblio_ingest_single_record {
154         my $self = shift;
155         my $client = shift;
156         my $rec = shift;
157
158         OpenILS::Application::Ingest->post_init();
159         my $r = OpenSRF::AppSession
160                         ->create('open-ils.cstore')
161                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
162                         ->gather(1);
163
164         return undef unless ($r and @$r);
165
166         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
167
168         $_->source($rec) for (@{$res->{field_entries}});
169         $_->record($rec) for (@{$res->{full_rec}});
170         $res->{descriptor}->record($rec);
171
172         return $res;
173 }
174 __PACKAGE__->register_method(  
175         api_name        => "open-ils.ingest.full.biblio.record.readonly",
176         method          => "ro_biblio_ingest_single_record",
177         api_level       => 1,
178         argc            => 1,
179 );                      
180
181 sub ro_biblio_ingest_stream_record {
182         my $self = shift;
183         my $client = shift;
184
185         OpenILS::Application::Ingest->post_init();
186
187         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
188
189         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
190         
191                 my $rec = $resp->content;
192                 last unless (defined $rec);
193
194                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
195                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
196
197                 $_->source($rec) for (@{$res->{field_entries}});
198                 $_->record($rec) for (@{$res->{full_rec}});
199
200                 $client->respond( $res );
201         }
202
203         return undef;
204 }
205 __PACKAGE__->register_method(  
206         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
207         method          => "ro_biblio_ingest_stream_record",
208         api_level       => 1,
209         stream          => 1,
210 );                      
211
212 sub ro_biblio_ingest_stream_xml {
213         my $self = shift;
214         my $client = shift;
215
216         OpenILS::Application::Ingest->post_init();
217
218         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
219
220         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
221         
222                 my $xml = $resp->content;
223                 last unless (defined $xml);
224
225                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
226                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
227
228                 $client->respond( $res );
229         }
230
231         return undef;
232 }
233 __PACKAGE__->register_method(  
234         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
235         method          => "ro_biblio_ingest_stream_xml",
236         api_level       => 1,
237         stream          => 1,
238 );                      
239
240 sub rw_biblio_ingest_stream_import {
241         my $self = shift;
242         my $client = shift;
243
244         OpenILS::Application::Ingest->post_init();
245
246         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
247
248         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
249         
250                 my $bib = $resp->content;
251                 last unless (defined $bib);
252
253                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
254                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
255
256                 $_->source($bib->id) for (@{$res->{field_entries}});
257                 $_->record($bib->id) for (@{$res->{full_rec}});
258
259                 $client->respond( $res );
260         }
261
262         return undef;
263 }
264 __PACKAGE__->register_method(  
265         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
266         method          => "rw_biblio_ingest_stream_import",
267         api_level       => 1,
268         stream          => 1,
269 );                      
270
271
272 # --------------------------------------------------------------------------------
273 # Authority ingest
274
275 package OpenILS::Application::Ingest::Authority;
276 use base qw/OpenILS::Application::Ingest/;
277 use Unicode::Normalize;
278
279 sub ro_authority_ingest_single_object {
280         my $self = shift;
281         my $client = shift;
282         my $bib = shift;
283         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
284
285         my $document = $parser->parse_string($xml);
286
287         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
288
289         $_->record($bib->id) for (@mfr);
290
291         return { full_rec => \@mfr };
292 }
293 __PACKAGE__->register_method(  
294         api_name        => "open-ils.ingest.full.authority.object.readonly",
295         method          => "ro_authority_ingest_single_object",
296         api_level       => 1,
297         argc            => 1,
298 );                      
299
300 sub ro_authority_ingest_single_xml {
301         my $self = shift;
302         my $client = shift;
303         my $xml = OpenILS::Application::Ingest::entityize(shift);
304
305         my $document = $parser->parse_string($xml);
306
307         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
308
309         return { full_rec => \@mfr };
310 }
311 __PACKAGE__->register_method(  
312         api_name        => "open-ils.ingest.full.authority.xml.readonly",
313         method          => "ro_authority_ingest_single_xml",
314         api_level       => 1,
315         argc            => 1,
316 );                      
317
318 sub ro_authority_ingest_single_record {
319         my $self = shift;
320         my $client = shift;
321         my $rec = shift;
322
323         OpenILS::Application::Ingest->post_init();
324         my $r = OpenSRF::AppSession
325                         ->create('open-ils.cstore')
326                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
327                         ->gather(1);
328
329         return undef unless ($r and @$r);
330
331         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
332
333         $_->record($rec) for (@{$res->{full_rec}});
334         $res->{descriptor}->record($rec);
335
336         return $res;
337 }
338 __PACKAGE__->register_method(  
339         api_name        => "open-ils.ingest.full.authority.record.readonly",
340         method          => "ro_authority_ingest_single_record",
341         api_level       => 1,
342         argc            => 1,
343 );                      
344
345 sub ro_authority_ingest_stream_record {
346         my $self = shift;
347         my $client = shift;
348
349         OpenILS::Application::Ingest->post_init();
350
351         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
352
353         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
354         
355                 my $rec = $resp->content;
356                 last unless (defined $rec);
357
358                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
359                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
360
361                 $_->record($rec) for (@{$res->{full_rec}});
362
363                 $client->respond( $res );
364         }
365
366         return undef;
367 }
368 __PACKAGE__->register_method(  
369         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
370         method          => "ro_authority_ingest_stream_record",
371         api_level       => 1,
372         stream          => 1,
373 );                      
374
375 sub ro_authority_ingest_stream_xml {
376         my $self = shift;
377         my $client = shift;
378
379         OpenILS::Application::Ingest->post_init();
380
381         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
382
383         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
384         
385                 my $xml = $resp->content;
386                 last unless (defined $xml);
387
388                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
389                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
390
391                 $client->respond( $res );
392         }
393
394         return undef;
395 }
396 __PACKAGE__->register_method(  
397         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
398         method          => "ro_authority_ingest_stream_xml",
399         api_level       => 1,
400         stream          => 1,
401 );                      
402
403 sub rw_authority_ingest_stream_import {
404         my $self = shift;
405         my $client = shift;
406
407         OpenILS::Application::Ingest->post_init();
408
409         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
410
411         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
412         
413                 my $bib = $resp->content;
414                 last unless (defined $bib);
415
416                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
417                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
418
419                 $_->record($bib->id) for (@{$res->{full_rec}});
420
421                 $client->respond( $res );
422         }
423
424         return undef;
425 }
426 __PACKAGE__->register_method(  
427         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
428         method          => "rw_authority_ingest_stream_import",
429         api_level       => 1,
430         stream          => 1,
431 );                      
432
433
434 # --------------------------------------------------------------------------------
435 # MARC index extraction
436
437 package OpenILS::Application::Ingest::XPATH;
438 use base qw/OpenILS::Application::Ingest/;
439 use Unicode::Normalize;
440
441 # give this an XML documentElement and an XPATH expression
442 sub xpath_to_string {
443         my $xml = shift;
444         my $xpath = shift;
445         my $ns_uri = shift;
446         my $ns_prefix = shift;
447         my $unique = shift;
448
449         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
450
451         my $string = "";
452
453         # grab the set of matching nodes
454         my @nodes = $xml->findnodes( $xpath );
455         for my $value (@nodes) {
456
457                 # grab all children of the node
458                 my @children = $value->childNodes();
459                 for my $child (@children) {
460
461                         # add the childs content to the growing buffer
462                         my $content = quotemeta($child->textContent);
463                         next if ($unique && $string =~ /$content/);  # uniquify the values
464                         $string .= $child->textContent . " ";
465                 }
466                 if( ! @children ) {
467                         $string .= $value->textContent . " ";
468                 }
469         }
470         return NFD($string);
471 }
472
473 sub class_index_string_xml {
474         my $self = shift;
475         my $client = shift;
476         my $xml = shift;
477         my @classes = @_;
478
479         OpenILS::Application::Ingest->post_init();
480         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
481
482         my %transform_cache;
483         
484         for my $class (@classes) {
485                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
486                 for my $type ( keys %{ $xpathset->{$class} } ) {
487
488                         my $def = $xpathset->{$class}->{$type};
489                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
490
491                         my $document = $xml;
492
493                         if ($sf->{xslt}) {
494                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
495                                 $transform_cache{$def->{format}} = $document;
496                         }
497
498                         my $value =  xpath_to_string(
499                                         $document->documentElement      => $def->{xpath},
500                                         $sf->{ns}                       => $def->{format},
501                                         1
502                         );
503
504                         next unless $value;
505
506                         $value = NFD($value);
507                         $value =~ s/\pM+//sgo;
508                         $value =~ s/\pC+//sgo;
509                         $value =~ s/\W+$//sgo;
510
511                         $value =~ s/(\w)\.+(\w)/$1$2/sgo;
512                         $value = lc($value);
513
514                         my $fm = $class_constructor->new;
515                         $fm->value( $value );
516                         $fm->field( $xpathset->{$class}->{$type}->{id} );
517                         $client->respond($fm);
518                 }
519         }
520         return undef;
521 }
522 __PACKAGE__->register_method(  
523         api_name        => "open-ils.ingest.field_entry.class.xml",
524         method          => "class_index_string_xml",
525         api_level       => 1,
526         argc            => 2,
527         stream          => 1,
528 );                      
529
530 sub class_index_string_record {
531         my $self = shift;
532         my $client = shift;
533         my $rec = shift;
534         my @classes = shift;
535
536         OpenILS::Application::Ingest->post_init();
537         my $r = OpenSRF::AppSession
538                         ->create('open-ils.cstore')
539                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
540                         ->gather(1);
541
542         return undef unless ($r and @$r);
543
544         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
545                 $fm->source($rec);
546                 $client->respond($fm);
547         }
548         return undef;
549 }
550 __PACKAGE__->register_method(  
551         api_name        => "open-ils.ingest.field_entry.class.record",
552         method          => "class_index_string_record",
553         api_level       => 1,
554         argc            => 2,
555         stream          => 1,
556 );                      
557
558 sub all_index_string_xml {
559         my $self = shift;
560         my $client = shift;
561         my $xml = shift;
562
563         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
564                 $client->respond($fm);
565         }
566         return undef;
567 }
568 __PACKAGE__->register_method(  
569         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
570         method          => "all_index_string_xml",
571         api_level       => 1,
572         argc            => 1,
573         stream          => 1,
574 );                      
575
576 sub all_index_string_record {
577         my $self = shift;
578         my $client = shift;
579         my $rec = shift;
580
581         OpenILS::Application::Ingest->post_init();
582         my $r = OpenSRF::AppSession
583                         ->create('open-ils.cstore')
584                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
585                         ->gather(1);
586
587         return undef unless ($r and @$r);
588
589         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
590                 $fm->source($rec);
591                 $client->respond($fm);
592         }
593         return undef;
594 }
595 __PACKAGE__->register_method(  
596         api_name        => "open-ils.ingest.extract.field_entry.all.record",
597         method          => "all_index_string_record",
598         api_level       => 1,
599         argc            => 1,
600         stream          => 1,
601 );                      
602
603 # --------------------------------------------------------------------------------
604 # Flat MARC
605
606 package OpenILS::Application::Ingest::FlatMARC;
607 use base qw/OpenILS::Application::Ingest/;
608 use Unicode::Normalize;
609
610
611 sub _marcxml_to_full_rows {
612
613         my $marcxml = shift;
614         my $xmltype = shift || 'metabib';
615
616         my $type = "Fieldmapper::${xmltype}::full_rec";
617
618         my @ns_list;
619         
620         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
621
622         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
623                 next unless $tagline;
624
625                 my $ns = $type->new;
626
627                 $ns->tag( 'LDR' );
628                 my $val = $tagline->textContent;
629                 $val = NFD($val);
630                 $val =~ s/\pM+//sgo;
631                 $val =~ s/\pC+//sgo;
632                 $val =~ s/\W+$//sgo;
633                 $ns->value( $val );
634
635                 push @ns_list, $ns;
636         }
637
638         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
639                 next unless $tagline;
640
641                 my $ns = $type->new;
642
643                 $ns->tag( $tagline->getAttribute( "tag" ) );
644                 my $val = $tagline->textContent;
645                 $val = NFD($val);
646                 $val =~ s/\pM+//sgo;
647                 $val =~ s/\pC+//sgo;
648                 $val =~ s/\W+$//sgo;
649                 $ns->value( $val );
650
651                 push @ns_list, $ns;
652         }
653
654         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
655                 next unless $tagline;
656
657                 my $tag = $tagline->getAttribute( "tag" );
658                 my $ind1 = $tagline->getAttribute( "ind1" );
659                 my $ind2 = $tagline->getAttribute( "ind2" );
660
661                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
662                         next unless $data;
663
664                         my $ns = $type->new;
665
666                         $ns->tag( $tag );
667                         $ns->ind1( $ind1 );
668                         $ns->ind2( $ind2 );
669                         $ns->subfield( $data->getAttribute( "code" ) );
670                         my $val = $data->textContent;
671                         $val = NFD($val);
672                         $val =~ s/\pM+//sgo;
673                         $val =~ s/\pC+//sgo;
674                         $val =~ s/\W+$//sgo;
675                         $ns->value( lc($val) );
676
677                         push @ns_list, $ns;
678                 }
679         }
680
681         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
682         return @ns_list;
683 }
684
685 sub flat_marc_xml {
686         my $self = shift;
687         my $client = shift;
688         my $xml = shift;
689
690         $log->debug("processing [$xml]");
691
692         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
693
694         my $type = 'metabib';
695         $type = 'authority' if ($self->api_name =~ /authority/o);
696
697         OpenILS::Application::Ingest->post_init();
698
699         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
700         return undef;
701 }
702 __PACKAGE__->register_method(  
703         api_name        => "open-ils.ingest.flat_marc.authority.xml",
704         method          => "flat_marc_xml",
705         api_level       => 1,
706         argc            => 1,
707         stream          => 1,
708 );                      
709 __PACKAGE__->register_method(  
710         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
711         method          => "flat_marc_xml",
712         api_level       => 1,
713         argc            => 1,
714         stream          => 1,
715 );                      
716
717 sub flat_marc_record {
718         my $self = shift;
719         my $client = shift;
720         my $rec = shift;
721
722         my $type = 'biblio';
723         $type = 'authority' if ($self->api_name =~ /authority/o);
724
725         OpenILS::Application::Ingest->post_init();
726         my $r = OpenSRF::AppSession
727                         ->create('open-ils.cstore')
728                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
729                         ->gather(1);
730
731
732         return undef unless ($r and $r->marc);
733
734         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
735         for my $row (@rows) {
736                 $client->respond($row);
737                 $log->debug(JSON->perl2JSON($row), DEBUG);
738         }
739         return undef;
740 }
741 __PACKAGE__->register_method(  
742         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
743         method          => "flat_marc_record",
744         api_level       => 1,
745         argc            => 1,
746         stream          => 1,
747 );                      
748 __PACKAGE__->register_method(  
749         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
750         method          => "flat_marc_record",
751         api_level       => 1,
752         argc            => 1,
753         stream          => 1,
754 );                      
755
756 # --------------------------------------------------------------------------------
757 # Fingerprinting
758
759 package OpenILS::Application::Ingest::Biblio::Fingerprint;
760 use base qw/OpenILS::Application::Ingest/;
761 use Unicode::Normalize;
762 use OpenSRF::EX qw/:try/;
763
764 sub biblio_fingerprint_record {
765         my $self = shift;
766         my $client = shift;
767         my $rec = shift;
768
769         OpenILS::Application::Ingest->post_init();
770
771         my $r = OpenSRF::AppSession
772                         ->create('open-ils.cstore')
773                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
774                         ->gather(1);
775
776         return undef unless ($r and $r->marc);
777
778         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
779         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
780         $fp->{quality} = int($fp->{quality});
781         return $fp;
782 }
783 __PACKAGE__->register_method(  
784         api_name        => "open-ils.ingest.fingerprint.record",
785         method          => "biblio_fingerprint_record",
786         api_level       => 1,
787         argc            => 1,
788 );                      
789
790 our $fp_script;
791 sub biblio_fingerprint {
792         my $self = shift;
793         my $client = shift;
794         my $xml = OpenILS::Application::Ingest::entityize(shift);
795
796         $log->internal("Got MARC [$xml]");
797
798         if(!$fp_script) {
799                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
800                 my $conf = OpenSRF::Utils::SettingsClient->new;
801
802                 my $libs        = $conf->config_value(@pfx, 'script_path');
803                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
804                 my $script_libs = (ref($libs)) ? $libs : [$libs];
805
806                 $log->debug("Loading script $script_file for biblio fingerprinting...");
807                 
808                 $fp_script = new OpenILS::Utils::ScriptRunner
809                         ( file          => $script_file,
810                           paths         => $script_libs,
811                           reset_count   => 100 );
812         }
813
814         $fp_script->insert('environment' => {marc => $xml} => 1);
815
816         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
817         $log->debug("Script for biblio fingerprinting completed successfully...");
818
819         return $res;
820 }
821 __PACKAGE__->register_method(  
822         api_name        => "open-ils.ingest.fingerprint.xml",
823         method          => "biblio_fingerprint",
824         api_level       => 1,
825         argc            => 1,
826 );                      
827
828 our $rd_script;
829 sub biblio_descriptor {
830         my $self = shift;
831         my $client = shift;
832         my $xml = OpenILS::Application::Ingest::entityize(shift);
833
834         $log->internal("Got MARC [$xml]");
835
836         if(!$rd_script) {
837                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
838                 my $conf = OpenSRF::Utils::SettingsClient->new;
839
840                 my $libs        = $conf->config_value(@pfx, 'script_path');
841                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
842                 my $script_libs = (ref($libs)) ? $libs : [$libs];
843
844                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
845                 
846                 $rd_script = new OpenILS::Utils::ScriptRunner
847                         ( file          => $script_file,
848                           paths         => $script_libs,
849                           reset_count   => 100 );
850         }
851
852         $log->debug("Setting up environment for descriptor extraction script...");
853         $rd_script->insert('environment.marc' => $xml => 1);
854         $log->debug("Environment building complete...");
855
856         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
857         $log->debug("Script for biblio descriptor extraction completed successfully");
858
859         return $res;
860 }
861 __PACKAGE__->register_method(  
862         api_name        => "open-ils.ingest.descriptor.xml",
863         method          => "biblio_descriptor",
864         api_level       => 1,
865         argc            => 1,
866 );                      
867
868
869 1;
870
871 __END__
872
873 sub in_transaction {
874         OpenILS::Application::Ingest->post_init();
875         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
876 }
877
878 sub begin_transaction {
879         my $self = shift;
880         my $client = shift;
881         
882         OpenILS::Application::Ingest->post_init();
883         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
884         
885         try {
886                 if (!$outer_xact) {
887                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
888                         #__PACKAGE__->st_sess->connect;
889                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
890                         unless (defined $r and $r) {
891                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
892                                 #__PACKAGE__->st_sess->disconnect;
893                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
894                         }
895                 }
896         } otherwise {
897                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
898         };
899
900         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
901 }
902
903 sub rollback_transaction {
904         my $self = shift;
905         my $client = shift;
906
907         OpenILS::Application::Ingest->post_init();
908         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
909
910         try {
911                 if ($outer_xact) {
912                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
913                 } else {
914                         $log->debug("Ingest isn't inside a transaction.", INFO);
915                 }
916         } catch Error with {
917                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
918         };
919
920         return 1;
921 }
922
923 sub commit_transaction {
924         my $self = shift;
925         my $client = shift;
926
927         OpenILS::Application::Ingest->post_init();
928         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
929
930         try {
931                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
932                 if ($outer_xact) {
933                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
934                         unless (defined $r and $r) {
935                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
936                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
937                         }
938                         #__PACKAGE__->st_sess->disconnect;
939                 } else {
940                         $log->debug("Ingest isn't inside a transaction.", INFO);
941                 }
942         } catch Error with {
943                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
944         };
945
946         return 1;
947 }
948
949 sub storage_req {
950         my $self = shift;
951         my $method = shift;
952         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
953         return shift( @res );
954 }
955
956 sub scrub_authority_record {
957         my $self = shift;
958         my $client = shift;
959         my $rec = shift;
960
961         my $commit = 0;
962         if (!OpenILS::Application::Ingest->in_transaction) {
963                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
964                 $commit = 1;
965         }
966
967         my $success = 1;
968         try {
969                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
970
971                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
972                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
973
974                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
975         } otherwise {
976                 $log->debug('Scrubbing failed : '.shift(), ERROR);
977                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
978                 $success = 0;
979         };
980
981         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
982         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
983         return $success;
984 }
985 __PACKAGE__->register_method(  
986         api_name        => "open-ils.worm.scrub.authority",
987         method          => "scrub_authority_record",
988         api_level       => 1,
989         argc            => 1,
990 );                      
991
992
993 sub scrub_metabib_record {
994         my $self = shift;
995         my $client = shift;
996         my $rec = shift;
997
998         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
999                 $rec = OpenILS::Application::Ingest->storage_req(
1000                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1001                 );
1002         }
1003
1004         my $commit = 0;
1005         if (!OpenILS::Application::Ingest->in_transaction) {
1006                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1007                 $commit = 1;
1008         }
1009
1010         my $success = 1;
1011         try {
1012                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1013                 
1014                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1015                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1016                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1017                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1018                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1019                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1020                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1021                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1022
1023                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1024                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1025
1026                 for my $mr (@$masters) {
1027                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1028                         my $others = OpenILS::Application::Ingest->storage_req(
1029                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1030
1031                         if (@$others) {
1032                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1033                                 $mr->master_record($others->[0]->source);
1034                                 OpenILS::Application::Ingest->storage_req(
1035                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1036                                         { id => $mr->id },
1037                                         { master_record => $others->[0]->source, mods => undef }
1038                                 );
1039                         } else {
1040                                 warn "Removing metarecord whose master is $rec";
1041                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1042                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1043                                 warn "Metarecord removed";
1044                                 $log->debug( "Metarecord removed", DEBUG);
1045                         }
1046                 }
1047
1048                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1049
1050         } otherwise {
1051                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1052                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1053                 $success = 0;
1054         };
1055
1056         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1057         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1058         return $success;
1059 }
1060 __PACKAGE__->register_method(  
1061         api_name        => "open-ils.worm.scrub.biblio",
1062         method          => "scrub_metabib_record",
1063         api_level       => 1,
1064         argc            => 1,
1065 );                      
1066
1067 sub wormize_biblio_metarecord {
1068         my $self = shift;
1069         my $client = shift;
1070         my $mrec = shift;
1071
1072         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1073
1074         my $count = 0;
1075         for my $r (@$recs) {
1076                 my $success = 0;
1077                 try {
1078                         $success = wormize_biblio_record($self => $client => $r->source);
1079                         $client->respond(
1080                                 { record  => $r->source,
1081                                   metarecord => $rec->metarecord,
1082                                   success => $success,
1083                                 }
1084                         );
1085                 } catch Error with {
1086                         my $e = shift;
1087                         $client->respond(
1088                                 { record  => $r->source,
1089                                   metarecord => $rec->metarecord,
1090                                   success => $success,
1091                                   error   => $e,
1092                                 }
1093                         );
1094                 };
1095         }
1096         return undef;
1097 }
1098 __PACKAGE__->register_method(
1099         api_name        => "open-ils.worm.wormize.metarecord",
1100         method          => "wormize_biblio_metarecord",
1101         api_level       => 1,
1102         argc            => 1,
1103         stream          => 1,
1104 );
1105 __PACKAGE__->register_method(
1106         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1107         method          => "wormize_biblio_metarecord",
1108         api_level       => 1,
1109         argc            => 1,
1110         stream          => 1,
1111 );
1112 __PACKAGE__->register_method(
1113         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1114         method          => "wormize_biblio_metarecord",
1115         api_level       => 1,
1116         argc            => 1,
1117         stream          => 1,
1118 );
1119 __PACKAGE__->register_method(
1120         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1121         method          => "wormize_biblio_metarecord",
1122         api_level       => 1,
1123         argc            => 1,
1124         stream          => 1,
1125 );
1126
1127
1128 sub wormize_biblio_record {
1129         my $self = shift;
1130         my $client = shift;
1131         my $rec = shift;
1132
1133         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1134                 $rec = OpenILS::Application::Ingest->storage_req(
1135                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1136                 );
1137         }
1138
1139
1140         my $commit = 0;
1141         if (!OpenILS::Application::Ingest->in_transaction) {
1142                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1143                 $commit = 1;
1144         }
1145
1146         my $success = 1;
1147         try {
1148                 # clean up the cruft
1149                 unless ($self->api_name =~ /noscrub/o) {
1150                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1151                 }
1152
1153                 # now redo 'em
1154                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1155
1156                 my @full_rec = ();
1157                 my @rec_descriptor = ();
1158                 my %field_entry = (
1159                         title   => [],
1160                         author  => [],
1161                         subject => [],
1162                         keyword => [],
1163                         series  => [],
1164                 );
1165                 my %metarecord = ();
1166                 my @source_map = ();
1167                 for my $r (@$bibs) {
1168                         try {
1169                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1170
1171                                 my $xml = $parser->parse_string($r->marc);
1172
1173                                 #update the fingerprint
1174                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1175                                 OpenILS::Application::Ingest->storage_req(
1176                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1177                                         { id => $r->id },
1178                                         { fingerprint => $fp->{fingerprint},
1179                                           quality     => int($fp->{quality}) }
1180                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1181
1182                                 # the full_rec stuff
1183                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1184                                         $fr->record( $r->id );
1185                                         push @full_rec, $fr;
1186                                 }
1187
1188                                 # the rec_descriptor stuff
1189                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1190                                 $rd->record( $r->id );
1191                                 push @rec_descriptor, $rd;
1192                         
1193                                 # the indexing field entry stuff
1194                                 for my $class ( qw/title author subject keyword series/ ) {
1195                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1196                                                 $fe->source( $r->id );
1197                                                 push @{$field_entry{$class}}, $fe;
1198                                         }
1199                                 }
1200
1201                                 unless ($self->api_name =~ /nomap/o) {
1202                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1203                                 
1204                                         unless ($mr) {
1205                                                 $mr = Fieldmapper::metabib::metarecord->new;
1206                                                 $mr->fingerprint( $fp->{fingerprint} );
1207                                                 $mr->master_record( $r->id );
1208                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1209                                         }
1210
1211                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1212                                         $mr_map->metarecord( $mr->id );
1213                                         $mr_map->source( $r->id );
1214                                         push @source_map, $mr_map;
1215
1216                                         $metarecord{$mr->id} = $mr;
1217                                 }
1218                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1219                         } otherwise {
1220                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1221                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1222                         };
1223                 }
1224                 
1225
1226                 if (@rec_descriptor) {
1227                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1228
1229                         OpenILS::Application::Ingest->storage_req(
1230                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1231                                 @source_map
1232                         ) if (@source_map);
1233
1234                         for my $mr ( values %metarecord ) {
1235                                 my $sources = OpenILS::Application::Ingest->storage_req(
1236                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1237                                         $mr->id
1238                                 );
1239
1240                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1241                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1242                                         [ map { $_->source } @$sources ]
1243                                 );
1244
1245                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1246
1247                                 OpenILS::Application::Ingest->storage_req(
1248                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1249                                         { id => $mr->id },
1250                                         { master_record => $master->id, mods => undef }
1251                                 );
1252                         }
1253
1254                         OpenILS::Application::Ingest->storage_req(
1255                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1256                                 @rec_descriptor
1257                         ) if (@rec_descriptor);
1258
1259                         OpenILS::Application::Ingest->storage_req(
1260                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1261                                 @full_rec
1262                         ) if (@full_rec);
1263
1264                         OpenILS::Application::Ingest->storage_req(
1265                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1266                                 @{ $field_entry{title} }
1267                         ) if (@{ $field_entry{title} });
1268
1269                         OpenILS::Application::Ingest->storage_req(
1270                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1271                                 @{ $field_entry{author} }
1272                         ) if (@{ $field_entry{author} });
1273                         
1274                         OpenILS::Application::Ingest->storage_req(
1275                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1276                                 @{ $field_entry{subject} }
1277                         ) if (@{ $field_entry{subject} });
1278
1279                         OpenILS::Application::Ingest->storage_req(
1280                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1281                                 @{ $field_entry{keyword} }
1282                         ) if (@{ $field_entry{keyword} });
1283
1284                         OpenILS::Application::Ingest->storage_req(
1285                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1286                                 @{ $field_entry{series} }
1287                         ) if (@{ $field_entry{series} });
1288
1289                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1290                 } else {
1291                         $success = 0;
1292                 }
1293
1294         } otherwise {
1295                 $log->debug('Wormization failed : '.shift(), ERROR);
1296                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1297                 $success = 0;
1298         };
1299
1300         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1301         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1302         return $success;
1303 }
1304 __PACKAGE__->register_method(
1305         api_name        => "open-ils.worm.wormize.biblio",
1306         method          => "wormize_biblio_record",
1307         api_level       => 1,
1308         argc            => 1,
1309 );
1310 __PACKAGE__->register_method(
1311         api_name        => "open-ils.worm.wormize.biblio.nomap",
1312         method          => "wormize_biblio_record",
1313         api_level       => 1,
1314         argc            => 1,
1315 );
1316 __PACKAGE__->register_method(
1317         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1318         method          => "wormize_biblio_record",
1319         api_level       => 1,
1320         argc            => 1,
1321 );
1322 __PACKAGE__->register_method(
1323         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1324         method          => "wormize_biblio_record",
1325         api_level       => 1,
1326         argc            => 1,
1327 );
1328
1329 sub wormize_authority_record {
1330         my $self = shift;
1331         my $client = shift;
1332         my $rec = shift;
1333
1334         my $commit = 0;
1335         if (!OpenILS::Application::Ingest->in_transaction) {
1336                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1337                 $commit = 1;
1338         }
1339
1340         my $success = 1;
1341         try {
1342                 # clean up the cruft
1343                 unless ($self->api_name =~ /noscrub/o) {
1344                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1345                 }
1346
1347                 # now redo 'em
1348                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1349
1350                 my @full_rec = ();
1351                 my @rec_descriptor = ();
1352                 for my $r (@$bibs) {
1353                         my $xml = $parser->parse_string($r->marc);
1354
1355                         # the full_rec stuff
1356                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1357                                 $fr->record( $r->id );
1358                                 push @full_rec, $fr;
1359                         }
1360
1361                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1362                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1363                         #$rd->record( $r->id );
1364                         #push @rec_descriptor, $rd;
1365                         
1366                 }
1367
1368                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1369
1370                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1371                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1372
1373                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1374
1375         } otherwise {
1376                 $log->debug('Wormization failed : '.shift(), ERROR);
1377                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1378                 $success = 0;
1379         };
1380
1381         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1382         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1383         return $success;
1384 }
1385 __PACKAGE__->register_method(
1386         api_name        => "open-ils.worm.wormize.authority",
1387         method          => "wormize_authority_record",
1388         api_level       => 1,
1389         argc            => 1,
1390 );
1391 __PACKAGE__->register_method(
1392         api_name        => "open-ils.worm.wormize.authority.noscrub",
1393         method          => "wormize_authority_record",
1394         api_level       => 1,
1395         argc            => 1,
1396 );
1397
1398
1399 # --------------------------------------------------------------------------------
1400 # MARC index extraction
1401
1402 package OpenILS::Application::Ingest::XPATH;
1403 use base qw/OpenILS::Application::Ingest/;
1404 use Unicode::Normalize;
1405
1406 # give this a MODS documentElement and an XPATH expression
1407 sub _xpath_to_string {
1408         my $xml = shift;
1409         my $xpath = shift;
1410         my $ns_uri = shift;
1411         my $ns_prefix = shift;
1412         my $unique = shift;
1413
1414         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1415
1416         my $string = "";
1417
1418         # grab the set of matching nodes
1419         my @nodes = $xml->findnodes( $xpath );
1420         for my $value (@nodes) {
1421
1422                 # grab all children of the node
1423                 my @children = $value->childNodes();
1424                 for my $child (@children) {
1425
1426                         # add the childs content to the growing buffer
1427                         my $content = quotemeta($child->textContent);
1428                         next if ($unique && $string =~ /$content/);  # uniquify the values
1429                         $string .= $child->textContent . " ";
1430                 }
1431                 if( ! @children ) {
1432                         $string .= $value->textContent . " ";
1433                 }
1434         }
1435         return NFD($string);
1436 }
1437
1438 sub class_all_index_string_xml {
1439         my $self = shift;
1440         my $client = shift;
1441         my $xml = shift;
1442         my $class = shift;
1443
1444         OpenILS::Application::Ingest->post_init();
1445         $xml = $parser->parse_string($xml) unless (ref $xml);
1446         
1447         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1448         for my $type ( keys %{ $xpathset->{$class} } ) {
1449                 my $value =  _xpath_to_string(
1450                                 $mods_sheet->transform($xml)->documentElement,
1451                                 $xpathset->{$class}->{$type}->{xpath},
1452                                 "http://www.loc.gov/mods/",
1453                                 "mods",
1454                                 1
1455                 );
1456
1457                 next unless $value;
1458
1459                 $value = NFD($value);
1460                 $value =~ s/\pM+//sgo;
1461                 $value =~ s/\pC+//sgo;
1462                 $value =~ s/\W+$//sgo;
1463
1464                 $value =~ s/(\w)\./$1/sgo;
1465                 $value = lc($value);
1466
1467                 my $fm = $class_constructor->new;
1468                 $fm->value( $value );
1469                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1470                 $client->respond($fm);
1471         }
1472         return undef;
1473 }
1474 __PACKAGE__->register_method(  
1475         api_name        => "open-ils.worm.field_entry.class.xml",
1476         method          => "class_all_index_string_xml",
1477         api_level       => 1,
1478         argc            => 1,
1479         stream          => 1,
1480 );                      
1481
1482 sub class_all_index_string_record {
1483         my $self = shift;
1484         my $client = shift;
1485         my $rec = shift;
1486         my $class = shift;
1487
1488         OpenILS::Application::Ingest->post_init();
1489         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1490
1491         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1492                 $fm->source($rec);
1493                 $client->respond($fm);
1494         }
1495         return undef;
1496 }
1497 __PACKAGE__->register_method(  
1498         api_name        => "open-ils.worm.field_entry.class.record",
1499         method          => "class_all_index_string_record",
1500         api_level       => 1,
1501         argc            => 1,
1502         stream          => 1,
1503 );                      
1504
1505
1506 sub class_index_string_xml {
1507         my $self = shift;
1508         my $client = shift;
1509         my $xml = shift;
1510         my $class = shift;
1511         my $type = shift;
1512
1513         OpenILS::Application::Ingest->post_init();
1514         $xml = $parser->parse_string($xml) unless (ref $xml);
1515         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1516 }
1517 __PACKAGE__->register_method(  
1518         api_name        => "open-ils.worm.class.type.xml",
1519         method          => "class_index_string_xml",
1520         api_level       => 1,
1521         argc            => 1,
1522 );                      
1523
1524 sub class_index_string_record {
1525         my $self = shift;
1526         my $client = shift;
1527         my $rec = shift;
1528         my $class = shift;
1529         my $type = shift;
1530
1531         OpenILS::Application::Ingest->post_init();
1532         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1533
1534         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1535         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1536         return $d;
1537 }
1538 __PACKAGE__->register_method(  
1539         api_name        => "open-ils.worm.class.type.record",
1540         method          => "class_index_string_record",
1541         api_level       => 1,
1542         argc            => 1,
1543 );                      
1544
1545 sub xml_xpath {
1546         my $self = shift;
1547         my $client = shift;
1548         my $xml = shift;
1549         my $xpath = shift;
1550         my $uri = shift;
1551         my $prefix = shift;
1552         my $unique = shift;
1553
1554         OpenILS::Application::Ingest->post_init();
1555         $xml = $parser->parse_string($xml) unless (ref $xml);
1556         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1557 }
1558 __PACKAGE__->register_method(  
1559         api_name        => "open-ils.worm.xpath.xml",
1560         method          => "xml_xpath",
1561         api_level       => 1,
1562         argc            => 1,
1563 );                      
1564
1565 sub record_xpath {
1566         my $self = shift;
1567         my $client = shift;
1568         my $rec = shift;
1569         my $xpath = shift;
1570         my $uri = shift;
1571         my $prefix = shift;
1572         my $unique = shift;
1573
1574         OpenILS::Application::Ingest->post_init();
1575         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1576
1577         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1578         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1579         return $d;
1580 }
1581 __PACKAGE__->register_method(  
1582         api_name        => "open-ils.worm.xpath.record",
1583         method          => "record_xpath",
1584         api_level       => 1,
1585         argc            => 1,
1586 );                      
1587
1588
1589 # --------------------------------------------------------------------------------
1590 # MARC Descriptor
1591
1592 package OpenILS::Application::Ingest::Biblio::Leader;
1593 use base qw/OpenILS::Application::Ingest/;
1594 use Unicode::Normalize;
1595
1596 our %marc_type_groups = (
1597         BKS => q/[at]{1}/,
1598         SER => q/[a]{1}/,
1599         VIS => q/[gkro]{1}/,
1600         MIX => q/[p]{1}/,
1601         MAP => q/[ef]{1}/,
1602         SCO => q/[cd]{1}/,
1603         REC => q/[ij]{1}/,
1604         COM => q/[m]{1}/,
1605 );
1606
1607 sub _type_re {
1608         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1609         return qr/$re/;
1610 }
1611
1612 our %biblio_descriptor_code = (
1613         item_type => sub { substr($ldr,6,1); },
1614         item_form =>
1615                 sub {
1616                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1617                                 return substr($oo8,29,1);
1618                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1619                                 return substr($oo8,23,1);
1620                         }
1621                         return ' ';
1622                 },
1623         bib_level => sub { substr($ldr,7,1); },
1624         control_type => sub { substr($ldr,8,1); },
1625         char_encoding => sub { substr($ldr,9,1); },
1626         enc_level => sub { substr($ldr,17,1); },
1627         cat_form => sub { substr($ldr,18,1); },
1628         pub_status => sub { substr($ldr,5,1); },
1629         item_lang => sub { substr($oo8,35,3); },
1630         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1631         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1632         audience => sub { substr($oo8,22,1); },
1633 );
1634
1635 sub _extract_biblio_descriptors {
1636         my $xml = shift;
1637
1638         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1639         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1640         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1641
1642         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1643         for my $rd_field ( keys %biblio_descriptor_code ) {
1644                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1645         }
1646
1647         return $rd_obj;
1648 }
1649
1650 sub extract_biblio_desc_xml {
1651         my $self = shift;
1652         my $client = shift;
1653         my $xml = shift;
1654
1655         $xml = $parser->parse_string($xml) unless (ref $xml);
1656
1657         return _extract_biblio_descriptors( $xml );
1658 }
1659 __PACKAGE__->register_method(  
1660         api_name        => "open-ils.worm.biblio_leader.xml",
1661         method          => "extract_biblio_desc_xml",
1662         api_level       => 1,
1663         argc            => 1,
1664 );                      
1665
1666 sub extract_biblio_desc_record {
1667         my $self = shift;
1668         my $client = shift;
1669         my $rec = shift;
1670
1671         OpenILS::Application::Ingest->post_init();
1672         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1673
1674         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1675         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1676         return $d;
1677 }
1678 __PACKAGE__->register_method(  
1679         api_name        => "open-ils.worm.biblio_leader.record",
1680         method          => "extract_biblio_desc_record",
1681         api_level       => 1,
1682         argc            => 1,
1683 );                      
1684
1685 # --------------------------------------------------------------------------------
1686 # Flat MARC
1687
1688 package OpenILS::Application::Ingest::FlatMARC;
1689 use base qw/OpenILS::Application::Ingest/;
1690 use Unicode::Normalize;
1691
1692
1693 sub _marcxml_to_full_rows {
1694
1695         my $marcxml = shift;
1696         my $xmltype = shift || 'metabib';
1697
1698         my $type = "Fieldmapper::${xmltype}::full_rec";
1699
1700         my @ns_list;
1701         
1702         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1703
1704         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1705                 next unless $tagline;
1706
1707                 my $ns = $type->new;
1708
1709                 $ns->tag( 'LDR' );
1710                 my $val = $tagline->textContent;
1711                 $val = NFD($val);
1712                 $val =~ s/\pM+//sgo;
1713                 $val =~ s/\pC+//sgo;
1714                 $val =~ s/\W+$//sgo;
1715                 $ns->value( $val );
1716
1717                 push @ns_list, $ns;
1718         }
1719
1720         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1721                 next unless $tagline;
1722
1723                 my $ns = $type->new;
1724
1725                 $ns->tag( $tagline->getAttribute( "tag" ) );
1726                 my $val = $tagline->textContent;
1727                 $val = NFD($val);
1728                 $val =~ s/\pM+//sgo;
1729                 $val =~ s/\pC+//sgo;
1730                 $val =~ s/\W+$//sgo;
1731                 $ns->value( $val );
1732
1733                 push @ns_list, $ns;
1734         }
1735
1736         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1737                 next unless $tagline;
1738
1739                 my $tag = $tagline->getAttribute( "tag" );
1740                 my $ind1 = $tagline->getAttribute( "ind1" );
1741                 my $ind2 = $tagline->getAttribute( "ind2" );
1742
1743                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1744                         next unless $data;
1745
1746                         my $ns = $type->new;
1747
1748                         $ns->tag( $tag );
1749                         $ns->ind1( $ind1 );
1750                         $ns->ind2( $ind2 );
1751                         $ns->subfield( $data->getAttribute( "code" ) );
1752                         my $val = $data->textContent;
1753                         $val = NFD($val);
1754                         $val =~ s/\pM+//sgo;
1755                         $val =~ s/\pC+//sgo;
1756                         $val =~ s/\W+$//sgo;
1757                         $ns->value( lc($val) );
1758
1759                         push @ns_list, $ns;
1760                 }
1761         }
1762
1763         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1764         return @ns_list;
1765 }
1766
1767 sub flat_marc_xml {
1768         my $self = shift;
1769         my $client = shift;
1770         my $xml = shift;
1771
1772         $xml = $parser->parse_string($xml) unless (ref $xml);
1773
1774         my $type = 'metabib';
1775         $type = 'authority' if ($self->api_name =~ /authority/o);
1776
1777         OpenILS::Application::Ingest->post_init();
1778
1779         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1780         return undef;
1781 }
1782 __PACKAGE__->register_method(  
1783         api_name        => "open-ils.worm.flat_marc.authority.xml",
1784         method          => "flat_marc_xml",
1785         api_level       => 1,
1786         argc            => 1,
1787         stream          => 1,
1788 );                      
1789 __PACKAGE__->register_method(  
1790         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1791         method          => "flat_marc_xml",
1792         api_level       => 1,
1793         argc            => 1,
1794         stream          => 1,
1795 );                      
1796
1797 sub flat_marc_record {
1798         my $self = shift;
1799         my $client = shift;
1800         my $rec = shift;
1801
1802         my $type = 'biblio';
1803         $type = 'authority' if ($self->api_name =~ /authority/o);
1804
1805         OpenILS::Application::Ingest->post_init();
1806         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1807
1808         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1809         return undef;
1810 }
1811 __PACKAGE__->register_method(  
1812         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1813         method          => "flat_marc_record",
1814         api_level       => 1,
1815         argc            => 1,
1816         stream          => 1,
1817 );                      
1818 __PACKAGE__->register_method(  
1819         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1820         method          => "flat_marc_record",
1821         api_level       => 1,
1822         argc            => 1,
1823         stream          => 1,
1824 );                      
1825
1826
1827 # --------------------------------------------------------------------------------
1828 # Fingerprinting
1829
1830 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1831 use base qw/OpenILS::Application::Ingest/;
1832 use Unicode::Normalize;
1833 use OpenSRF::EX qw/:try/;
1834
1835 my @fp_mods_xpath = (
1836         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1837                         title   => {
1838                                         xpath   => [
1839                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1840                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1841                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1842                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1843                                         ],
1844                                         fixup   => sub {
1845                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1846                                                         $text = NFD($text);
1847                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1848                                                         $text =~ s/\pM+//gso;
1849                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1850                                                         $text = lc($text);
1851                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1852                                                         $text =~ s/\s+/ /sgo;
1853                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1854                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1855                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1856                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1857                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1858                                                         $text =~ s/\[.[^\]]+\]//sgo;
1859                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1860                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1861                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1862                                                 },
1863                         },
1864                         author  => {
1865                                         xpath   => [
1866                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1867                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1868                                         ],
1869                                         fixup   => sub {
1870                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1871                                                         $text = NFD($text);
1872                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1873                                                         $text =~ s/\pM+//gso;
1874                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1875                                                         $text = lc($text);
1876                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1877                                                         $text =~ s/\s+/ /sgo;
1878                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1879                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1880                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1881                                                         $text =~ s/,?\s+.*$//sgo;
1882                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1883                                                 },
1884                         },
1885         ],
1886
1887         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1888                         title   => {
1889                                         xpath   => [
1890                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1891                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1892                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1893                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1894                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1895                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1896                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1897                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1898                                         ],
1899                                         fixup   => sub {
1900                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1901                                                         $text = NFD($text);
1902                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1903                                                         $text =~ s/\pM+//gso;
1904                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1905                                                         $text = lc($text);
1906                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1907                                                         $text =~ s/\s+/ /sgo;
1908                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1909                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1910                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1911                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1912                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1913                                                         $text =~ s/\[.[^\]]+\]//sgo;
1914                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1915                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1916                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1917                                                 },
1918                         },
1919                         author  => {
1920                                         xpath   => [
1921                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1922                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1923                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1924                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1925                                         ],
1926                                         fixup   => sub {
1927                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1928                                                         $text = NFD($text);
1929                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1930                                                         $text =~ s/\pM+//gso;
1931                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1932                                                         $text = lc($text);
1933                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1934                                                         $text =~ s/\s+/ /sgo;
1935                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1936                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1937                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1938                                                         $text =~ s/,?\s+.*$//sgo;
1939                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1940                                                 },
1941                         },
1942         ],
1943
1944 );
1945
1946 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1947
1948 sub _fp_mods {
1949         my $mods = shift;
1950         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1951
1952         my $fp_string = '';
1953
1954         my $match_index = 0;
1955         my $block_index = 1;
1956         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1957                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1958
1959                         my $block_name_index = 0;
1960                         my $block_value_index = 1;
1961                         my $block = $fp_mods_xpath[$block_index];
1962                         while ( my $part = $$block[$block_value_index] ) {
1963                                 local $text;
1964                                 for my $xpath ( @{ $part->{xpath} } ) {
1965                                         $text = $mods->findvalue( $xpath );
1966                                         last if ($text);
1967                                 }
1968
1969                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1970
1971                                 if ($text) {
1972                                         $$part{fixup}->();
1973                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1974                                         $fp_string .= $text;
1975                                 }
1976
1977                                 $block_name_index += 2;
1978                                 $block_value_index += 2;
1979                         }
1980                 }
1981                 if ($fp_string) {
1982                         $fp_string =~ s/\W+//gso;
1983                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1984                         return $fp_string;
1985                 }
1986
1987                 $match_index += 2;
1988                 $block_index += 2;
1989         }
1990         return undef;
1991 }
1992
1993 sub refingerprint_bibrec {
1994         my $self = shift;
1995         my $client = shift;
1996         my $rec = shift;
1997
1998         my $commit = 0;
1999         if (!OpenILS::Application::Ingest->in_transaction) {
2000                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2001                 $commit = 1;
2002         }
2003
2004         my $success = 1;
2005         try {
2006                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2007                 for my $b (@$bibs) {
2008                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2009
2010                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2011
2012                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2013
2014                                 OpenILS::Application::Ingest->storage_req(
2015                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2016                                         { id => $b->id },
2017                                         { fingerprint => $fp->{fingerprint},
2018                                           quality     => $fp->{quality} }
2019                                 );
2020
2021                                 if ($self->api_name !~ /nomap/o) {
2022                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2023                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2024                                                 $b->id
2025                                         );
2026
2027                                         my $old_mrid;
2028                                         if (ref($old_source_map) and @$old_source_map) {
2029                                                 for my $m (@$old_source_map) {
2030                                                         $old_mrid = $m->metarecord;
2031                                                         OpenILS::Application::Ingest->storage_req(
2032                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2033                                                                 $m->id
2034                                                         );
2035                                                 }
2036                                         }
2037
2038                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2039                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2040                                                         { metarecord => $old_mrid }
2041                                         ) if ($old_mrid);
2042
2043                                         if (ref($old_sm) and @$old_sm == 0) {
2044                                                 OpenILS::Application::Ingest->storage_req(
2045                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2046                                                         $old_mrid
2047                                                 );
2048                                         }
2049
2050                                         my $mr = OpenILS::Application::Ingest->storage_req(
2051                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2052                                                         { fingerprint => $fp->{fingerprint} }
2053                                         )->[0];
2054                                 
2055                                         unless ($mr) {
2056                                                 $mr = Fieldmapper::metabib::metarecord->new;
2057                                                 $mr->fingerprint( $fp->{fingerprint} );
2058                                                 $mr->master_record( $b->id );
2059                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2060                                         }
2061
2062                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2063                                         $mr_map->metarecord( $mr->id );
2064                                         $mr_map->source( $b->id );
2065                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2066
2067                                 }
2068                         }
2069                         $client->respond($b->id);
2070                 }
2071
2072         } otherwise {
2073                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2074                 $success = 0;
2075         };
2076
2077         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2078         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2079         return undef;
2080 }
2081 __PACKAGE__->register_method(  
2082         api_name        => "open-ils.worm.fingerprint.record.update",
2083         method          => "refingerprint_bibrec",
2084         api_level       => 1,
2085         argc            => 1,
2086         stream          => 1,
2087 );                      
2088
2089 __PACKAGE__->register_method(  
2090         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2091         method          => "refingerprint_bibrec",
2092         api_level       => 1,
2093         argc            => 1,
2094 );                      
2095
2096 =comment
2097
2098 sub fingerprint_bibrec {
2099         my $self = shift;
2100         my $client = shift;
2101         my $rec = shift;
2102
2103         OpenILS::Application::Ingest->post_init();
2104         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2105
2106         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2107         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2108         return $fp;
2109
2110 }
2111 __PACKAGE__->register_method(  
2112         api_name        => "open-ils.worm.fingerprint.record",
2113         method          => "fingerprint_bibrec",
2114         api_level       => 0,
2115         argc            => 1,
2116 );                      
2117
2118
2119 sub fingerprint_mods {
2120         my $self = shift;
2121         my $client = shift;
2122         my $xml = shift;
2123
2124         OpenILS::Application::Ingest->post_init();
2125         my $mods = $parser->parse_string($xml)->documentElement;
2126
2127         return _fp_mods( $mods );
2128 }
2129 __PACKAGE__->register_method(  
2130         api_name        => "open-ils.worm.fingerprint.mods",
2131         method          => "fingerprint_mods",
2132         api_level       => 1,
2133         argc            => 1,
2134 );                      
2135
2136 sub fingerprint_marc {
2137         my $self = shift;
2138         my $client = shift;
2139         my $xml = shift;
2140
2141         $xml = $parser->parse_string($xml) unless (ref $xml);
2142
2143         OpenILS::Application::Ingest->post_init();
2144         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2145         $log->debug("Returning [$fp] as fingerprint", INFO);
2146         return $fp;
2147 }
2148 __PACKAGE__->register_method(  
2149         api_name        => "open-ils.worm.fingerprint.marc",
2150         method          => "fingerprint_marc",
2151         api_level       => 1,
2152         argc            => 1,
2153 );                      
2154
2155
2156 =cut
2157
2158 sub biblio_fingerprint_record {
2159         my $self = shift;
2160         my $client = shift;
2161         my $rec = shift;
2162
2163         OpenILS::Application::Ingest->post_init();
2164
2165         my $marc = OpenILS::Application::Ingest
2166                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2167                         ->marc;
2168
2169         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2170         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2171         return $fp;
2172 }
2173 __PACKAGE__->register_method(  
2174         api_name        => "open-ils.worm.fingerprint.record",
2175         method          => "biblio_fingerprint_record",
2176         api_level       => 1,
2177         argc            => 1,
2178 );                      
2179
2180 our $fp_script;
2181 sub biblio_fingerprint {
2182         my $self = shift;
2183         my $client = shift;
2184         my $marc = shift;
2185
2186         OpenILS::Application::Ingest->post_init();
2187
2188         $marc = $parser->parse_string($marc) unless (ref $marc);
2189
2190         my $mods = OpenILS::Application::Ingest::entityize(
2191                 $mods_sheet
2192                         ->transform( $marc )
2193                         ->documentElement
2194                         ->toString,
2195                 'D'
2196         );
2197
2198         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2199
2200         warn $marc;
2201         $log->internal("Got MARC [$marc]");
2202         $log->internal("Created MODS [$mods]");
2203
2204         if(!$fp_script) {
2205                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2206                 my $conf = OpenSRF::Utils::SettingsClient->new;
2207
2208                 my $libs        = $conf->config_value(@pfx, 'script_path');
2209                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2210                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2211
2212                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2213                 
2214                 $fp_script = new OpenILS::Utils::ScriptRunner
2215                         ( file          => $script_file,
2216                           paths         => $script_libs,
2217                           reset_count   => 1000 );
2218         }
2219
2220         $log->debug("Applying environment for biblio fingerprinting...");
2221
2222         my $env = {marc => $marc, mods => $mods};
2223         #my $res = {fingerprint => '', quality => '0'};
2224
2225         $fp_script->insert('environment' => $env);
2226         #$fp_script->insert('result' => $res);
2227
2228         $log->debug("Running script for biblio fingerprinting...");
2229
2230         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2231
2232         $log->debug("Script for biblio fingerprinting completed successfully...");
2233
2234         return $res;
2235 }
2236 __PACKAGE__->register_method(  
2237         api_name        => "open-ils.worm.fingerprint.marc",
2238         method          => "biblio_fingerprint",
2239         api_level       => 1,
2240         argc            => 1,
2241 );                      
2242
2243 # --------------------------------------------------------------------------------
2244
2245 1;
2246
2247 __END__
2248 my $in_xact;
2249 my $begin;
2250 my $commit;
2251 my $rollback;
2252 my $lookup;
2253 my $update_entry;
2254 my $mr_lookup;
2255 my $mr_update;
2256 my $mr_create;
2257 my $create_source_map;
2258 my $sm_lookup;
2259 my $rm_old_rd;
2260 my $rm_old_sm;
2261 my $rm_old_fr;
2262 my $rm_old_tr;
2263 my $rm_old_ar;
2264 my $rm_old_sr;
2265 my $rm_old_kr;
2266 my $rm_old_ser;
2267
2268 my $fr_create;
2269 my $rd_create;
2270 my $create = {};
2271
2272 my %descriptor_code = (
2273         item_type => 'substr($ldr,6,1)',
2274         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2275         bib_level => 'substr($ldr,7,1)',
2276         control_type => 'substr($ldr,8,1)',
2277         char_encoding => 'substr($ldr,9,1)',
2278         enc_level => 'substr($ldr,17,1)',
2279         cat_form => 'substr($ldr,18,1)',
2280         pub_status => 'substr($ldr,5,1)',
2281         item_lang => 'substr($oo8,35,3)',
2282         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2283         audience => 'substr($oo8,22,1)',
2284 );
2285
2286 sub wormize {
2287
2288         my $self = shift;
2289         my $client = shift;
2290         my @docids = @_;
2291
2292         my $no_map = 0;
2293         if ($self->api_name =~ /no_map/o) {
2294                 $no_map = 1;
2295         }
2296
2297         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2298                 unless ($in_xact);
2299         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2300                 unless ($begin);
2301         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2302                 unless ($commit);
2303         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2304                 unless ($rollback);
2305         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2306                 unless ($sm_lookup);
2307         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2308                 unless ($mr_lookup);
2309         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2310                 unless ($mr_update);
2311         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2312                 unless ($lookup);
2313         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2314                 unless ($update_entry);
2315         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2316                 unless ($rm_old_sm);
2317         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2318                 unless ($rm_old_rd);
2319         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2320                 unless ($rm_old_fr);
2321         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2322                 unless ($rm_old_tr);
2323         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2324                 unless ($rm_old_ar);
2325         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2326                 unless ($rm_old_sr);
2327         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2328                 unless ($rm_old_kr);
2329         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2330                 unless ($rm_old_ser);
2331         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2332                 unless ($mr_create);
2333         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2334                 unless ($create_source_map);
2335         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2336                 unless ($rd_create);
2337         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2338                 unless ($fr_create);
2339         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2340                 unless ($$create{title});
2341         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2342                 unless ($$create{author});
2343         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2344                 unless ($$create{subject});
2345         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2346                 unless ($$create{keyword});
2347         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2348                 unless ($$create{series});
2349
2350
2351         my ($outer_xact) = $in_xact->run;
2352         try {
2353                 unless ($outer_xact) {
2354                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2355                         my ($r) = $begin->run($client);
2356                         unless (defined $r and $r) {
2357                                 $rollback->run;
2358                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2359                         }
2360                 }
2361         } catch Error with {
2362                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2363         };
2364
2365         my @source_maps;
2366         my @entry_list;
2367         my @mr_list;
2368         my @rd_list;
2369         my @ns_list;
2370         my @mods_data;
2371         my $ret = 0;
2372         for my $entry ( $lookup->run(@docids) ) {
2373                 # step -1: grab the doc from storage
2374                 next unless ($entry);
2375
2376                 if(!$mods_sheet) {
2377                         my $xslt_doc = $parser->parse_file(
2378                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2379                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2380                 }
2381
2382                 my $xml = $entry->marc;
2383                 my $docid = $entry->id;
2384                 my $marcdoc = $parser->parse_string($xml);
2385                 my $modsdoc = $mods_sheet->transform($marcdoc);
2386
2387                 my $mods = $modsdoc->documentElement;
2388                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2389
2390                 $entry->fingerprint( fingerprint_mods( $mods ) );
2391                 push @entry_list, $entry;
2392
2393                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2394
2395                 unless ($no_map) {
2396                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2397                         if (!$mr || !@$mr) {
2398                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2399                                 $mr = new Fieldmapper::metabib::metarecord;
2400                                 $mr->fingerprint( $entry->fingerprint );
2401                                 $mr->master_record( $entry->id );
2402                                 my ($new_mr) = $mr_create->run($mr);
2403                                 $mr->id($new_mr);
2404                                 unless (defined $mr) {
2405                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2406                                 }
2407                         } else {
2408                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2409                                 $mr->mods('');
2410                                 push @mr_list, $mr;
2411                         }
2412
2413                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2414                         $sm->metarecord( $mr->id );
2415                         $sm->source( $entry->id );
2416                         push @source_maps, $sm;
2417                 }
2418
2419                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2420                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2421
2422                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2423                 for my $rd_field ( keys %descriptor_code ) {
2424                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2425                 }
2426                 $rd_obj->record( $docid );
2427                 push @rd_list, $rd_obj;
2428
2429                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2430
2431                 # step 2: build the KOHA rows
2432                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2433                 $_->record( $docid ) for (@tmp_list);
2434                 push @ns_list, @tmp_list;
2435
2436                 $ret++;
2437
2438                 last unless ($self->api_name =~ /batch$/o);
2439         }
2440
2441         $rm_old_rd->run( { record => \@docids } );
2442         $rm_old_fr->run( { record => \@docids } );
2443         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2444         $rm_old_tr->run( { source => \@docids } );
2445         $rm_old_ar->run( { source => \@docids } );
2446         $rm_old_sr->run( { source => \@docids } );
2447         $rm_old_kr->run( { source => \@docids } );
2448         $rm_old_ser->run( { source => \@docids } );
2449
2450         unless ($no_map) {
2451                 my ($sm) = $create_source_map->run(@source_maps);
2452                 unless (defined $sm) {
2453                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2454                 }
2455                 my ($mr) = $mr_update->run(@mr_list);
2456                 unless (defined $mr) {
2457                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2458                 }
2459         }
2460
2461         my ($re) = $update_entry->run(@entry_list);
2462         unless (defined $re) {
2463                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2464         }
2465
2466         my ($rd) = $rd_create->run(@rd_list);
2467         unless (defined $rd) {
2468                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2469         }
2470
2471         my ($fr) = $fr_create->run(@ns_list);
2472         unless (defined $fr) {
2473                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2474         }
2475
2476         # step 5: insert the new metadata
2477         for my $class ( qw/title author subject keyword series/ ) {
2478                 my @md_list = ();
2479                 for my $doc ( @mods_data ) {
2480                         my ($did) = keys %$doc;
2481                         my ($data) = values %$doc;
2482
2483                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2484                         for my $row ( keys %{ $$data{$class} } ) {
2485                                 next unless (exists $$data{$class}{$row});
2486                                 next unless ($$data{$class}{$row}{value});
2487                                 my $fm_obj = $fm_constructor->new;
2488                                 $fm_obj->value( $$data{$class}{$row}{value} );
2489                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2490                                 $fm_obj->source( $did );
2491                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2492
2493                                 push @md_list, $fm_obj;
2494                         }
2495                 }
2496                         
2497                 my ($cr) = $$create{$class}->run(@md_list);
2498                 unless (defined $cr) {
2499                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2500                 }
2501         }
2502
2503         unless ($outer_xact) {
2504                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2505                 my ($c) = $commit->run;
2506                 unless (defined $c and $c) {
2507                         $rollback->run;
2508                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2509                 }
2510         }
2511
2512         return $ret;
2513 }
2514 __PACKAGE__->register_method( 
2515         api_name        => "open-ils.worm.wormize",
2516         method          => "wormize",
2517         api_level       => 1,
2518         argc            => 1,
2519 );
2520 __PACKAGE__->register_method( 
2521         api_name        => "open-ils.worm.wormize.no_map",
2522         method          => "wormize",
2523         api_level       => 1,
2524         argc            => 1,
2525 );
2526 __PACKAGE__->register_method( 
2527         api_name        => "open-ils.worm.wormize.batch",
2528         method          => "wormize",
2529         api_level       => 1,
2530         argc            => 1,
2531 );
2532 __PACKAGE__->register_method( 
2533         api_name        => "open-ils.worm.wormize.no_map.batch",
2534         method          => "wormize",
2535         api_level       => 1,
2536         argc            => 1,
2537 );
2538
2539
2540 my $ain_xact;
2541 my $abegin;
2542 my $acommit;
2543 my $arollback;
2544 my $alookup;
2545 my $aupdate_entry;
2546 my $amr_lookup;
2547 my $amr_update;
2548 my $amr_create;
2549 my $acreate_source_map;
2550 my $asm_lookup;
2551 my $arm_old_rd;
2552 my $arm_old_sm;
2553 my $arm_old_fr;
2554 my $arm_old_tr;
2555 my $arm_old_ar;
2556 my $arm_old_sr;
2557 my $arm_old_kr;
2558 my $arm_old_ser;
2559
2560 my $afr_create;
2561 my $ard_create;
2562 my $acreate = {};
2563
2564 sub authority_wormize {
2565
2566         my $self = shift;
2567         my $client = shift;
2568         my @docids = @_;
2569
2570         my $no_map = 0;
2571         if ($self->api_name =~ /no_map/o) {
2572                 $no_map = 1;
2573         }
2574
2575         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2576                 unless ($in_xact);
2577         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2578                 unless ($begin);
2579         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2580                 unless ($commit);
2581         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2582                 unless ($rollback);
2583         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2584                 unless ($alookup);
2585         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2586                 unless ($aupdate_entry);
2587         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2588                 unless ($arm_old_rd);
2589         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2590                 unless ($arm_old_fr);
2591         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2592                 unless ($ard_create);
2593         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2594                 unless ($afr_create);
2595
2596
2597         my ($outer_xact) = $in_xact->run;
2598         try {
2599                 unless ($outer_xact) {
2600                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2601                         my ($r) = $begin->run($client);
2602                         unless (defined $r and $r) {
2603                                 $rollback->run;
2604                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2605                         }
2606                 }
2607         } catch Error with {
2608                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2609         };
2610
2611         my @source_maps;
2612         my @entry_list;
2613         my @mr_list;
2614         my @rd_list;
2615         my @ns_list;
2616         my @mads_data;
2617         my $ret = 0;
2618         for my $entry ( $lookup->run(@docids) ) {
2619                 # step -1: grab the doc from storage
2620                 next unless ($entry);
2621
2622                 #if(!$mads_sheet) {
2623                 #       my $xslt_doc = $parser->parse_file(
2624                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2625                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2626                 #}
2627
2628                 my $xml = $entry->marc;
2629                 my $docid = $entry->id;
2630                 my $marcdoc = $parser->parse_string($xml);
2631                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2632
2633                 #my $mads = $madsdoc->documentElement;
2634                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2635
2636                 push @entry_list, $entry;
2637
2638                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2639                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2640
2641                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2642                 for my $rd_field ( keys %descriptor_code ) {
2643                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2644                 }
2645                 $rd_obj->record( $docid );
2646                 push @rd_list, $rd_obj;
2647
2648                 # step 2: build the KOHA rows
2649                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2650                 $_->record( $docid ) for (@tmp_list);
2651                 push @ns_list, @tmp_list;
2652
2653                 $ret++;
2654
2655                 last unless ($self->api_name =~ /batch$/o);
2656         }
2657
2658         $arm_old_rd->run( { record => \@docids } );
2659         $arm_old_fr->run( { record => \@docids } );
2660
2661         my ($rd) = $ard_create->run(@rd_list);
2662         unless (defined $rd) {
2663                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2664         }
2665
2666         my ($fr) = $fr_create->run(@ns_list);
2667         unless (defined $fr) {
2668                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2669         }
2670
2671         unless ($outer_xact) {
2672                 $log->debug("Commiting transaction started by Ingest.", INFO);
2673                 my ($c) = $commit->run;
2674                 unless (defined $c and $c) {
2675                         $rollback->run;
2676                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2677                 }
2678         }
2679
2680         return $ret;
2681 }
2682 __PACKAGE__->register_method( 
2683         api_name        => "open-ils.worm.authortiy.wormize",
2684         method          => "wormize",
2685         api_level       => 1,
2686         argc            => 1,
2687 );
2688 __PACKAGE__->register_method( 
2689         api_name        => "open-ils.worm.authority.wormize.batch",
2690         method          => "wormize",
2691         api_level       => 1,
2692         argc            => 1,
2693 );
2694
2695
2696 # --------------------------------------------------------------------------------
2697
2698
2699 sub _marcxml_to_full_rows {
2700
2701         my $marcxml = shift;
2702         my $type = shift || 'Fieldmapper::metabib::full_rec';
2703
2704         my @ns_list;
2705         
2706         my $root = $marcxml->documentElement;
2707
2708         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2709                 next unless $tagline;
2710
2711                 my $ns = new Fieldmapper::metabib::full_rec;
2712
2713                 $ns->tag( 'LDR' );
2714                 my $val = NFD($tagline->textContent);
2715                 $val =~ s/(\pM+)//gso;
2716                 $ns->value( $val );
2717
2718                 push @ns_list, $ns;
2719         }
2720
2721         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2722                 next unless $tagline;
2723
2724                 my $ns = new Fieldmapper::metabib::full_rec;
2725
2726                 $ns->tag( $tagline->getAttribute( "tag" ) );
2727                 my $val = NFD($tagline->textContent);
2728                 $val =~ s/(\pM+)//gso;
2729                 $ns->value( $val );
2730
2731                 push @ns_list, $ns;
2732         }
2733
2734         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2735                 next unless $tagline;
2736
2737                 my $tag = $tagline->getAttribute( "tag" );
2738                 my $ind1 = $tagline->getAttribute( "ind1" );
2739                 my $ind2 = $tagline->getAttribute( "ind2" );
2740
2741                 for my $data ( $tagline->childNodes ) {
2742                         next unless $data;
2743
2744                         my $ns = $type->new;
2745
2746                         $ns->tag( $tag );
2747                         $ns->ind1( $ind1 );
2748                         $ns->ind2( $ind2 );
2749                         $ns->subfield( $data->getAttribute( "code" ) );
2750                         my $val = NFD($data->textContent);
2751                         $val =~ s/(\pM+)//gso;
2752                         $ns->value( lc($val) );
2753
2754                         push @ns_list, $ns;
2755                 }
2756         }
2757         return @ns_list;
2758 }
2759
2760 sub _get_field_value {
2761
2762         my( $root, $xpath ) = @_;
2763
2764         my $string = "";
2765
2766         # grab the set of matching nodes
2767         my @nodes = $root->findnodes( $xpath );
2768         for my $value (@nodes) {
2769
2770                 # grab all children of the node
2771                 my @children = $value->childNodes();
2772                 for my $child (@children) {
2773
2774                         # add the childs content to the growing buffer
2775                         my $content = quotemeta($child->textContent);
2776                         next if ($string =~ /$content/);  # uniquify the values
2777                         $string .= $child->textContent . " ";
2778                 }
2779                 if( ! @children ) {
2780                         $string .= $value->textContent . " ";
2781                 }
2782         }
2783         $string = NFD($string);
2784         $string =~ s/(\pM)//gso;
2785         return lc($string);
2786 }
2787
2788
2789 sub modsdoc_to_values {
2790         my( $self, $mods ) = @_;
2791         my $data = {};
2792         for my $class (keys %$xpathset) {
2793                 $data->{$class} = {};
2794                 for my $type (keys %{$xpathset->{$class}}) {
2795                         $data->{$class}->{$type} = {};
2796                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2797                 }
2798         }
2799         return $data;
2800 }
2801
2802
2803 1;
2804
2805