]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
4e19dfaee841a79ba41a29010c7f91ae7024635e
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
25         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
26         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
27         atom    => {ns => 'http://www.w3.org/2005/Atom'},
28         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
29         rss092  => {ns => ''},
30         rss093  => {ns => ''},
31         rss094  => {ns => ''},
32         rss10   => {ns => 'http://purl.org/rss/1.0/'},
33         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
34         rss2    => {ns => ''},
35 );
36
37
38 my $log = 'OpenSRF::Utils::Logger';
39
40 my  $parser = XML::LibXML->new();
41 my  $xslt = XML::LibXSLT->new();
42
43 my  $mods_sheet;
44 my  $mads_sheet;
45 my  $xpathset = {};
46 sub initialize {}
47 sub child_init {}
48
49 sub post_init {
50
51         unless (keys %$xpathset) {
52                 $log->debug("Running post_init", DEBUG);
53
54                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
55
56                 unless ($supported_formats{mods}{xslt}) {
57                         $log->debug("Loading MODS XSLT", DEBUG);
58                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
59                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
60                 }
61
62                 unless ($supported_formats{mods3}{xslt}) {
63                         $log->debug("Loading MODS v3 XSLT", DEBUG);
64                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
65                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
66                 }
67
68
69                 my $req = OpenSRF::AppSession
70                                 ->create('open-ils.cstore')
71                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
72                                 ->gather(1);
73
74                 if (ref $req and @$req) {
75                         for my $f (@$req) {
76                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
79                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
80                         }
81                 }
82         }
83 }
84
85 sub entityize {
86         my $stuff = shift;
87         my $form = shift;
88
89         if ($form eq 'D') {
90                 $stuff = NFD($stuff);
91         } else {
92                 $stuff = NFC($stuff);
93         }
94
95         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
96         return $stuff;
97 }
98
99 # --------------------------------------------------------------------------------
100 # Biblio ingest
101
102 package OpenILS::Application::Ingest::Biblio;
103 use base qw/OpenILS::Application::Ingest/;
104 use Unicode::Normalize;
105
106 sub ro_biblio_ingest_single_object {
107         my $self = shift;
108         my $client = shift;
109         my $bib = shift;
110         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
111
112         my $document = $parser->parse_string($xml);
113
114         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
115         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
116         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
117         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
118
119         $_->source($bib->id) for (@mXfe);
120         $_->record($bib->id) for (@mfr);
121         $rd->record($bib->id) if ($rd);
122
123         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
124 }
125 __PACKAGE__->register_method(  
126         api_name        => "open-ils.ingest.full.biblio.object.readonly",
127         method          => "ro_biblio_ingest_single_object",
128         api_level       => 1,
129         argc            => 1,
130 );                      
131
132 sub ro_biblio_ingest_single_xml {
133         my $self = shift;
134         my $client = shift;
135         my $xml = OpenILS::Application::Ingest::entityize(shift);
136
137         my $document = $parser->parse_string($xml);
138
139         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
140         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
141         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
142         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
143
144         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
145 }
146 __PACKAGE__->register_method(  
147         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
148         method          => "ro_biblio_ingest_single_xml",
149         api_level       => 1,
150         argc            => 1,
151 );                      
152
153 sub ro_biblio_ingest_single_record {
154         my $self = shift;
155         my $client = shift;
156         my $rec = shift;
157
158         OpenILS::Application::Ingest->post_init();
159         my $r = OpenSRF::AppSession
160                         ->create('open-ils.cstore')
161                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
162                         ->gather(1);
163
164         return undef unless ($r and @$r);
165
166         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
167
168         $_->source($rec) for (@{$res->{field_entries}});
169         $_->record($rec) for (@{$res->{full_rec}});
170         $res->{descriptor}->record($rec);
171
172         return $res;
173 }
174 __PACKAGE__->register_method(  
175         api_name        => "open-ils.ingest.full.biblio.record.readonly",
176         method          => "ro_biblio_ingest_single_record",
177         api_level       => 1,
178         argc            => 1,
179 );                      
180
181 sub ro_biblio_ingest_stream_record {
182         my $self = shift;
183         my $client = shift;
184
185         OpenILS::Application::Ingest->post_init();
186
187         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
188
189         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
190         
191                 my $rec = $resp->content;
192                 last unless (defined $rec);
193
194                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
195                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
196
197                 $_->source($rec) for (@{$res->{field_entries}});
198                 $_->record($rec) for (@{$res->{full_rec}});
199
200                 $client->respond( $res );
201         }
202
203         return undef;
204 }
205 __PACKAGE__->register_method(  
206         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
207         method          => "ro_biblio_ingest_stream_record",
208         api_level       => 1,
209         stream          => 1,
210 );                      
211
212 sub ro_biblio_ingest_stream_xml {
213         my $self = shift;
214         my $client = shift;
215
216         OpenILS::Application::Ingest->post_init();
217
218         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
219
220         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
221         
222                 my $xml = $resp->content;
223                 last unless (defined $xml);
224
225                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
226                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
227
228                 $client->respond( $res );
229         }
230
231         return undef;
232 }
233 __PACKAGE__->register_method(  
234         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
235         method          => "ro_biblio_ingest_stream_xml",
236         api_level       => 1,
237         stream          => 1,
238 );                      
239
240 sub rw_biblio_ingest_stream_import {
241         my $self = shift;
242         my $client = shift;
243
244         OpenILS::Application::Ingest->post_init();
245
246         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
247
248         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
249         
250                 my $bib = $resp->content;
251                 last unless (defined $bib);
252
253                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
254                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
255
256                 $_->source($bib->id) for (@{$res->{field_entries}});
257                 $_->record($bib->id) for (@{$res->{full_rec}});
258
259                 $client->respond( $res );
260         }
261
262         return undef;
263 }
264 __PACKAGE__->register_method(  
265         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
266         method          => "rw_biblio_ingest_stream_import",
267         api_level       => 1,
268         stream          => 1,
269 );                      
270
271
272 # --------------------------------------------------------------------------------
273 # Authority ingest
274
275 package OpenILS::Application::Ingest::Authority;
276 use base qw/OpenILS::Application::Ingest/;
277 use Unicode::Normalize;
278
279 sub ro_authority_ingest_single_object {
280         my $self = shift;
281         my $client = shift;
282         my $bib = shift;
283         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
284
285         my $document = $parser->parse_string($xml);
286
287         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
288
289         $_->record($bib->id) for (@mfr);
290
291         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
292 }
293 __PACKAGE__->register_method(  
294         api_name        => "open-ils.ingest.full.authority.object.readonly",
295         method          => "ro_authority_ingest_single_object",
296         api_level       => 1,
297         argc            => 1,
298 );                      
299
300 sub ro_authority_ingest_single_xml {
301         my $self = shift;
302         my $client = shift;
303         my $xml = OpenILS::Application::Ingest::entityize(shift);
304
305         my $document = $parser->parse_string($xml);
306
307         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
308
309         return { full_rec => \@mfr };
310 }
311 __PACKAGE__->register_method(  
312         api_name        => "open-ils.ingest.full.authority.xml.readonly",
313         method          => "ro_authority_ingest_single_xml",
314         api_level       => 1,
315         argc            => 1,
316 );                      
317
318 sub ro_authority_ingest_single_record {
319         my $self = shift;
320         my $client = shift;
321         my $rec = shift;
322
323         OpenILS::Application::Ingest->post_init();
324         my $r = OpenSRF::AppSession
325                         ->create('open-ils.cstore')
326                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
327                         ->gather(1);
328
329         return undef unless ($r and @$r);
330
331         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
332
333         $_->record($rec) for (@{$res->{full_rec}});
334         $res->{descriptor}->record($rec);
335
336         return $res;
337 }
338 __PACKAGE__->register_method(  
339         api_name        => "open-ils.ingest.full.authority.record.readonly",
340         method          => "ro_authority_ingest_single_record",
341         api_level       => 1,
342         argc            => 1,
343 );                      
344
345 sub ro_authority_ingest_stream_record {
346         my $self = shift;
347         my $client = shift;
348
349         OpenILS::Application::Ingest->post_init();
350
351         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
352
353         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
354         
355                 my $rec = $resp->content;
356                 last unless (defined $rec);
357
358                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
359                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
360
361                 $_->source($rec) for (@{$res->{field_entries}});
362                 $_->record($rec) for (@{$res->{full_rec}});
363
364                 $client->respond( $res );
365         }
366
367         return undef;
368 }
369 __PACKAGE__->register_method(  
370         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
371         method          => "ro_authority_ingest_stream_record",
372         api_level       => 1,
373         stream          => 1,
374 );                      
375
376 sub ro_authority_ingest_stream_xml {
377         my $self = shift;
378         my $client = shift;
379
380         OpenILS::Application::Ingest->post_init();
381
382         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
383
384         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
385         
386                 my $xml = $resp->content;
387                 last unless (defined $xml);
388
389                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
390                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
391
392                 $client->respond( $res );
393         }
394
395         return undef;
396 }
397 __PACKAGE__->register_method(  
398         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
399         method          => "ro_authority_ingest_stream_xml",
400         api_level       => 1,
401         stream          => 1,
402 );                      
403
404 sub rw_authority_ingest_stream_import {
405         my $self = shift;
406         my $client = shift;
407
408         OpenILS::Application::Ingest->post_init();
409
410         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
411
412         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
413         
414                 my $bib = $resp->content;
415                 last unless (defined $bib);
416
417                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
418                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
419
420                 $_->source($bib->id) for (@{$res->{field_entries}});
421                 $_->record($bib->id) for (@{$res->{full_rec}});
422
423                 $client->respond( $res );
424         }
425
426         return undef;
427 }
428 __PACKAGE__->register_method(  
429         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
430         method          => "rw_authority_ingest_stream_import",
431         api_level       => 1,
432         stream          => 1,
433 );                      
434
435
436 # --------------------------------------------------------------------------------
437 # MARC index extraction
438
439 package OpenILS::Application::Ingest::XPATH;
440 use base qw/OpenILS::Application::Ingest/;
441 use Unicode::Normalize;
442
443 # give this an XML documentElement and an XPATH expression
444 sub xpath_to_string {
445         my $xml = shift;
446         my $xpath = shift;
447         my $ns_uri = shift;
448         my $ns_prefix = shift;
449         my $unique = shift;
450
451         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
452
453         my $string = "";
454
455         # grab the set of matching nodes
456         my @nodes = $xml->findnodes( $xpath );
457         for my $value (@nodes) {
458
459                 # grab all children of the node
460                 my @children = $value->childNodes();
461                 for my $child (@children) {
462
463                         # add the childs content to the growing buffer
464                         my $content = quotemeta($child->textContent);
465                         next if ($unique && $string =~ /$content/);  # uniquify the values
466                         $string .= $child->textContent . " ";
467                 }
468                 if( ! @children ) {
469                         $string .= $value->textContent . " ";
470                 }
471         }
472         return NFD($string);
473 }
474
475 sub class_index_string_xml {
476         my $self = shift;
477         my $client = shift;
478         my $xml = shift;
479         my @classes = @_;
480
481         OpenILS::Application::Ingest->post_init();
482         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
483
484         my %transform_cache;
485         
486         for my $class (@classes) {
487                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
488                 for my $type ( keys %{ $xpathset->{$class} } ) {
489
490                         my $def = $xpathset->{$class}->{$type};
491                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
492
493                         my $document = $xml;
494
495                         if ($sf->{xslt}) {
496                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
497                                 $transform_cache{$def->{format}} = $document;
498                         }
499
500                         my $value =  xpath_to_string(
501                                         $document->documentElement      => $def->{xpath},
502                                         $sf->{ns}                       => $def->{format},
503                                         1
504                         );
505
506                         next unless $value;
507
508                         $value = NFD($value);
509                         $value =~ s/\pM+//sgo;
510                         $value =~ s/\pC+//sgo;
511                         $value =~ s/\W+$//sgo;
512
513                         $value =~ s/(\w)\.+(\w)/$1$2/sgo;
514                         $value = lc($value);
515
516                         my $fm = $class_constructor->new;
517                         $fm->value( $value );
518                         $fm->field( $xpathset->{$class}->{$type}->{id} );
519                         $client->respond($fm);
520                 }
521         }
522         return undef;
523 }
524 __PACKAGE__->register_method(  
525         api_name        => "open-ils.ingest.field_entry.class.xml",
526         method          => "class_index_string_xml",
527         api_level       => 1,
528         argc            => 2,
529         stream          => 1,
530 );                      
531
532 sub class_index_string_record {
533         my $self = shift;
534         my $client = shift;
535         my $rec = shift;
536         my @classes = shift;
537
538         OpenILS::Application::Ingest->post_init();
539         my $r = OpenSRF::AppSession
540                         ->create('open-ils.cstore')
541                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
542                         ->gather(1);
543
544         return undef unless ($r and @$r);
545
546         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
547                 $fm->source($rec);
548                 $client->respond($fm);
549         }
550         return undef;
551 }
552 __PACKAGE__->register_method(  
553         api_name        => "open-ils.ingest.field_entry.class.record",
554         method          => "class_index_string_record",
555         api_level       => 1,
556         argc            => 2,
557         stream          => 1,
558 );                      
559
560 sub all_index_string_xml {
561         my $self = shift;
562         my $client = shift;
563         my $xml = shift;
564
565         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
566                 $client->respond($fm);
567         }
568         return undef;
569 }
570 __PACKAGE__->register_method(  
571         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
572         method          => "all_index_string_xml",
573         api_level       => 1,
574         argc            => 1,
575         stream          => 1,
576 );                      
577
578 sub all_index_string_record {
579         my $self = shift;
580         my $client = shift;
581         my $rec = shift;
582
583         OpenILS::Application::Ingest->post_init();
584         my $r = OpenSRF::AppSession
585                         ->create('open-ils.cstore')
586                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
587                         ->gather(1);
588
589         return undef unless ($r and @$r);
590
591         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
592                 $fm->source($rec);
593                 $client->respond($fm);
594         }
595         return undef;
596 }
597 __PACKAGE__->register_method(  
598         api_name        => "open-ils.ingest.extract.field_entry.all.record",
599         method          => "all_index_string_record",
600         api_level       => 1,
601         argc            => 1,
602         stream          => 1,
603 );                      
604
605 # --------------------------------------------------------------------------------
606 # Flat MARC
607
608 package OpenILS::Application::Ingest::FlatMARC;
609 use base qw/OpenILS::Application::Ingest/;
610 use Unicode::Normalize;
611
612
613 sub _marcxml_to_full_rows {
614
615         my $marcxml = shift;
616         my $xmltype = shift || 'metabib';
617
618         my $type = "Fieldmapper::${xmltype}::full_rec";
619
620         my @ns_list;
621         
622         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
623
624         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
625                 next unless $tagline;
626
627                 my $ns = $type->new;
628
629                 $ns->tag( 'LDR' );
630                 my $val = $tagline->textContent;
631                 $val = NFD($val);
632                 $val =~ s/\pM+//sgo;
633                 $val =~ s/\pC+//sgo;
634                 $val =~ s/\W+$//sgo;
635                 $ns->value( $val );
636
637                 push @ns_list, $ns;
638         }
639
640         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
641                 next unless $tagline;
642
643                 my $ns = $type->new;
644
645                 $ns->tag( $tagline->getAttribute( "tag" ) );
646                 my $val = $tagline->textContent;
647                 $val = NFD($val);
648                 $val =~ s/\pM+//sgo;
649                 $val =~ s/\pC+//sgo;
650                 $val =~ s/\W+$//sgo;
651                 $ns->value( $val );
652
653                 push @ns_list, $ns;
654         }
655
656         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
657                 next unless $tagline;
658
659                 my $tag = $tagline->getAttribute( "tag" );
660                 my $ind1 = $tagline->getAttribute( "ind1" );
661                 my $ind2 = $tagline->getAttribute( "ind2" );
662
663                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
664                         next unless $data;
665
666                         my $ns = $type->new;
667
668                         $ns->tag( $tag );
669                         $ns->ind1( $ind1 );
670                         $ns->ind2( $ind2 );
671                         $ns->subfield( $data->getAttribute( "code" ) );
672                         my $val = $data->textContent;
673                         $val = NFD($val);
674                         $val =~ s/\pM+//sgo;
675                         $val =~ s/\pC+//sgo;
676                         $val =~ s/\W+$//sgo;
677                         $ns->value( lc($val) );
678
679                         push @ns_list, $ns;
680                 }
681         }
682
683         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
684         return @ns_list;
685 }
686
687 sub flat_marc_xml {
688         my $self = shift;
689         my $client = shift;
690         my $xml = shift;
691
692         $log->debug("processing [$xml]");
693
694         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
695
696         my $type = 'metabib';
697         $type = 'authority' if ($self->api_name =~ /authority/o);
698
699         OpenILS::Application::Ingest->post_init();
700
701         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
702         return undef;
703 }
704 __PACKAGE__->register_method(  
705         api_name        => "open-ils.ingest.flat_marc.authority.xml",
706         method          => "flat_marc_xml",
707         api_level       => 1,
708         argc            => 1,
709         stream          => 1,
710 );                      
711 __PACKAGE__->register_method(  
712         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
713         method          => "flat_marc_xml",
714         api_level       => 1,
715         argc            => 1,
716         stream          => 1,
717 );                      
718
719 sub flat_marc_record {
720         my $self = shift;
721         my $client = shift;
722         my $rec = shift;
723
724         my $type = 'biblio';
725         $type = 'authority' if ($self->api_name =~ /authority/o);
726
727         OpenILS::Application::Ingest->post_init();
728         my $r = OpenSRF::AppSession
729                         ->create('open-ils.cstore')
730                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
731                         ->gather(1);
732
733
734         return undef unless ($r and $r->marc);
735
736         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
737         for my $row (@rows) {
738                 $client->respond($row);
739                 $log->debug(JSON->perl2JSON($row), DEBUG);
740         }
741         return undef;
742 }
743 __PACKAGE__->register_method(  
744         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
745         method          => "flat_marc_record",
746         api_level       => 1,
747         argc            => 1,
748         stream          => 1,
749 );                      
750 __PACKAGE__->register_method(  
751         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
752         method          => "flat_marc_record",
753         api_level       => 1,
754         argc            => 1,
755         stream          => 1,
756 );                      
757
758 # --------------------------------------------------------------------------------
759 # Fingerprinting
760
761 package OpenILS::Application::Ingest::Biblio::Fingerprint;
762 use base qw/OpenILS::Application::Ingest/;
763 use Unicode::Normalize;
764 use OpenSRF::EX qw/:try/;
765
766 sub biblio_fingerprint_record {
767         my $self = shift;
768         my $client = shift;
769         my $rec = shift;
770
771         OpenILS::Application::Ingest->post_init();
772
773         my $r = OpenSRF::AppSession
774                         ->create('open-ils.cstore')
775                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
776                         ->gather(1);
777
778         return undef unless ($r and $r->marc);
779
780         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
781         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
782         return $fp;
783 }
784 __PACKAGE__->register_method(  
785         api_name        => "open-ils.ingest.fingerprint.record",
786         method          => "biblio_fingerprint_record",
787         api_level       => 1,
788         argc            => 1,
789 );                      
790
791 our $fp_script;
792 sub biblio_fingerprint {
793         my $self = shift;
794         my $client = shift;
795         my $xml = OpenILS::Application::Ingest::entityize(shift);
796
797         $log->internal("Got MARC [$xml]");
798
799         if(!$fp_script) {
800                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
801                 my $conf = OpenSRF::Utils::SettingsClient->new;
802
803                 my $libs        = $conf->config_value(@pfx, 'script_path');
804                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
805                 my $script_libs = (ref($libs)) ? $libs : [$libs];
806
807                 $log->debug("Loading script $script_file for biblio fingerprinting...");
808                 
809                 $fp_script = new OpenILS::Utils::ScriptRunner
810                         ( file          => $script_file,
811                           paths         => $script_libs,
812                           reset_count   => 100 );
813         }
814
815         $fp_script->insert('environment' => {marc => $xml} => 1);
816
817         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
818         $log->debug("Script for biblio fingerprinting completed successfully...");
819
820         return $res;
821 }
822 __PACKAGE__->register_method(  
823         api_name        => "open-ils.ingest.fingerprint.xml",
824         method          => "biblio_fingerprint",
825         api_level       => 1,
826         argc            => 1,
827 );                      
828
829 our $rd_script;
830 sub biblio_descriptor {
831         my $self = shift;
832         my $client = shift;
833         my $xml = OpenILS::Application::Ingest::entityize(shift);
834
835         $log->internal("Got MARC [$xml]");
836
837         if(!$rd_script) {
838                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
839                 my $conf = OpenSRF::Utils::SettingsClient->new;
840
841                 my $libs        = $conf->config_value(@pfx, 'script_path');
842                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
843                 my $script_libs = (ref($libs)) ? $libs : [$libs];
844
845                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
846                 
847                 $rd_script = new OpenILS::Utils::ScriptRunner
848                         ( file          => $script_file,
849                           paths         => $script_libs,
850                           reset_count   => 100 );
851         }
852
853         $log->debug("Setting up environment for descriptor extraction script...");
854         $rd_script->insert('environment.marc' => $xml => 1);
855         $log->debug("Environment building complete...");
856
857         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
858         $log->debug("Script for biblio descriptor extraction completed successfully");
859
860         return $res;
861 }
862 __PACKAGE__->register_method(  
863         api_name        => "open-ils.ingest.descriptor.xml",
864         method          => "biblio_descriptor",
865         api_level       => 1,
866         argc            => 1,
867 );                      
868
869
870 1;
871
872 __END__
873
874 sub in_transaction {
875         OpenILS::Application::Ingest->post_init();
876         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
877 }
878
879 sub begin_transaction {
880         my $self = shift;
881         my $client = shift;
882         
883         OpenILS::Application::Ingest->post_init();
884         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
885         
886         try {
887                 if (!$outer_xact) {
888                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
889                         #__PACKAGE__->st_sess->connect;
890                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
891                         unless (defined $r and $r) {
892                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
893                                 #__PACKAGE__->st_sess->disconnect;
894                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
895                         }
896                 }
897         } otherwise {
898                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
899         };
900
901         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
902 }
903
904 sub rollback_transaction {
905         my $self = shift;
906         my $client = shift;
907
908         OpenILS::Application::Ingest->post_init();
909         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
910
911         try {
912                 if ($outer_xact) {
913                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
914                 } else {
915                         $log->debug("Ingest isn't inside a transaction.", INFO);
916                 }
917         } catch Error with {
918                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
919         };
920
921         return 1;
922 }
923
924 sub commit_transaction {
925         my $self = shift;
926         my $client = shift;
927
928         OpenILS::Application::Ingest->post_init();
929         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
930
931         try {
932                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
933                 if ($outer_xact) {
934                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
935                         unless (defined $r and $r) {
936                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
937                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
938                         }
939                         #__PACKAGE__->st_sess->disconnect;
940                 } else {
941                         $log->debug("Ingest isn't inside a transaction.", INFO);
942                 }
943         } catch Error with {
944                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
945         };
946
947         return 1;
948 }
949
950 sub storage_req {
951         my $self = shift;
952         my $method = shift;
953         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
954         return shift( @res );
955 }
956
957 sub scrub_authority_record {
958         my $self = shift;
959         my $client = shift;
960         my $rec = shift;
961
962         my $commit = 0;
963         if (!OpenILS::Application::Ingest->in_transaction) {
964                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
965                 $commit = 1;
966         }
967
968         my $success = 1;
969         try {
970                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
971
972                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
973                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
974
975                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
976         } otherwise {
977                 $log->debug('Scrubbing failed : '.shift(), ERROR);
978                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
979                 $success = 0;
980         };
981
982         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
983         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
984         return $success;
985 }
986 __PACKAGE__->register_method(  
987         api_name        => "open-ils.worm.scrub.authority",
988         method          => "scrub_authority_record",
989         api_level       => 1,
990         argc            => 1,
991 );                      
992
993
994 sub scrub_metabib_record {
995         my $self = shift;
996         my $client = shift;
997         my $rec = shift;
998
999         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1000                 $rec = OpenILS::Application::Ingest->storage_req(
1001                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1002                 );
1003         }
1004
1005         my $commit = 0;
1006         if (!OpenILS::Application::Ingest->in_transaction) {
1007                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1008                 $commit = 1;
1009         }
1010
1011         my $success = 1;
1012         try {
1013                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1014                 
1015                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1016                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1017                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1018                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1019                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1020                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1021                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1022                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1023
1024                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1025                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1026
1027                 for my $mr (@$masters) {
1028                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1029                         my $others = OpenILS::Application::Ingest->storage_req(
1030                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1031
1032                         if (@$others) {
1033                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1034                                 $mr->master_record($others->[0]->source);
1035                                 OpenILS::Application::Ingest->storage_req(
1036                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1037                                         { id => $mr->id },
1038                                         { master_record => $others->[0]->source, mods => undef }
1039                                 );
1040                         } else {
1041                                 warn "Removing metarecord whose master is $rec";
1042                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1043                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1044                                 warn "Metarecord removed";
1045                                 $log->debug( "Metarecord removed", DEBUG);
1046                         }
1047                 }
1048
1049                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1050
1051         } otherwise {
1052                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1053                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1054                 $success = 0;
1055         };
1056
1057         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1058         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1059         return $success;
1060 }
1061 __PACKAGE__->register_method(  
1062         api_name        => "open-ils.worm.scrub.biblio",
1063         method          => "scrub_metabib_record",
1064         api_level       => 1,
1065         argc            => 1,
1066 );                      
1067
1068 sub wormize_biblio_metarecord {
1069         my $self = shift;
1070         my $client = shift;
1071         my $mrec = shift;
1072
1073         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1074
1075         my $count = 0;
1076         for my $r (@$recs) {
1077                 my $success = 0;
1078                 try {
1079                         $success = wormize_biblio_record($self => $client => $r->source);
1080                         $client->respond(
1081                                 { record  => $r->source,
1082                                   metarecord => $rec->metarecord,
1083                                   success => $success,
1084                                 }
1085                         );
1086                 } catch Error with {
1087                         my $e = shift;
1088                         $client->respond(
1089                                 { record  => $r->source,
1090                                   metarecord => $rec->metarecord,
1091                                   success => $success,
1092                                   error   => $e,
1093                                 }
1094                         );
1095                 };
1096         }
1097         return undef;
1098 }
1099 __PACKAGE__->register_method(
1100         api_name        => "open-ils.worm.wormize.metarecord",
1101         method          => "wormize_biblio_metarecord",
1102         api_level       => 1,
1103         argc            => 1,
1104         stream          => 1,
1105 );
1106 __PACKAGE__->register_method(
1107         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1108         method          => "wormize_biblio_metarecord",
1109         api_level       => 1,
1110         argc            => 1,
1111         stream          => 1,
1112 );
1113 __PACKAGE__->register_method(
1114         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1115         method          => "wormize_biblio_metarecord",
1116         api_level       => 1,
1117         argc            => 1,
1118         stream          => 1,
1119 );
1120 __PACKAGE__->register_method(
1121         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1122         method          => "wormize_biblio_metarecord",
1123         api_level       => 1,
1124         argc            => 1,
1125         stream          => 1,
1126 );
1127
1128
1129 sub wormize_biblio_record {
1130         my $self = shift;
1131         my $client = shift;
1132         my $rec = shift;
1133
1134         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1135                 $rec = OpenILS::Application::Ingest->storage_req(
1136                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1137                 );
1138         }
1139
1140
1141         my $commit = 0;
1142         if (!OpenILS::Application::Ingest->in_transaction) {
1143                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1144                 $commit = 1;
1145         }
1146
1147         my $success = 1;
1148         try {
1149                 # clean up the cruft
1150                 unless ($self->api_name =~ /noscrub/o) {
1151                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1152                 }
1153
1154                 # now redo 'em
1155                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1156
1157                 my @full_rec = ();
1158                 my @rec_descriptor = ();
1159                 my %field_entry = (
1160                         title   => [],
1161                         author  => [],
1162                         subject => [],
1163                         keyword => [],
1164                         series  => [],
1165                 );
1166                 my %metarecord = ();
1167                 my @source_map = ();
1168                 for my $r (@$bibs) {
1169                         try {
1170                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1171
1172                                 my $xml = $parser->parse_string($r->marc);
1173
1174                                 #update the fingerprint
1175                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1176                                 OpenILS::Application::Ingest->storage_req(
1177                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1178                                         { id => $r->id },
1179                                         { fingerprint => $fp->{fingerprint},
1180                                           quality     => int($fp->{quality}) }
1181                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1182
1183                                 # the full_rec stuff
1184                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1185                                         $fr->record( $r->id );
1186                                         push @full_rec, $fr;
1187                                 }
1188
1189                                 # the rec_descriptor stuff
1190                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1191                                 $rd->record( $r->id );
1192                                 push @rec_descriptor, $rd;
1193                         
1194                                 # the indexing field entry stuff
1195                                 for my $class ( qw/title author subject keyword series/ ) {
1196                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1197                                                 $fe->source( $r->id );
1198                                                 push @{$field_entry{$class}}, $fe;
1199                                         }
1200                                 }
1201
1202                                 unless ($self->api_name =~ /nomap/o) {
1203                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1204                                 
1205                                         unless ($mr) {
1206                                                 $mr = Fieldmapper::metabib::metarecord->new;
1207                                                 $mr->fingerprint( $fp->{fingerprint} );
1208                                                 $mr->master_record( $r->id );
1209                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1210                                         }
1211
1212                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1213                                         $mr_map->metarecord( $mr->id );
1214                                         $mr_map->source( $r->id );
1215                                         push @source_map, $mr_map;
1216
1217                                         $metarecord{$mr->id} = $mr;
1218                                 }
1219                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1220                         } otherwise {
1221                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1222                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1223                         };
1224                 }
1225                 
1226
1227                 if (@rec_descriptor) {
1228                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1229
1230                         OpenILS::Application::Ingest->storage_req(
1231                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1232                                 @source_map
1233                         ) if (@source_map);
1234
1235                         for my $mr ( values %metarecord ) {
1236                                 my $sources = OpenILS::Application::Ingest->storage_req(
1237                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1238                                         $mr->id
1239                                 );
1240
1241                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1242                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1243                                         [ map { $_->source } @$sources ]
1244                                 );
1245
1246                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1247
1248                                 OpenILS::Application::Ingest->storage_req(
1249                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1250                                         { id => $mr->id },
1251                                         { master_record => $master->id, mods => undef }
1252                                 );
1253                         }
1254
1255                         OpenILS::Application::Ingest->storage_req(
1256                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1257                                 @rec_descriptor
1258                         ) if (@rec_descriptor);
1259
1260                         OpenILS::Application::Ingest->storage_req(
1261                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1262                                 @full_rec
1263                         ) if (@full_rec);
1264
1265                         OpenILS::Application::Ingest->storage_req(
1266                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1267                                 @{ $field_entry{title} }
1268                         ) if (@{ $field_entry{title} });
1269
1270                         OpenILS::Application::Ingest->storage_req(
1271                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1272                                 @{ $field_entry{author} }
1273                         ) if (@{ $field_entry{author} });
1274                         
1275                         OpenILS::Application::Ingest->storage_req(
1276                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1277                                 @{ $field_entry{subject} }
1278                         ) if (@{ $field_entry{subject} });
1279
1280                         OpenILS::Application::Ingest->storage_req(
1281                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1282                                 @{ $field_entry{keyword} }
1283                         ) if (@{ $field_entry{keyword} });
1284
1285                         OpenILS::Application::Ingest->storage_req(
1286                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1287                                 @{ $field_entry{series} }
1288                         ) if (@{ $field_entry{series} });
1289
1290                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1291                 } else {
1292                         $success = 0;
1293                 }
1294
1295         } otherwise {
1296                 $log->debug('Wormization failed : '.shift(), ERROR);
1297                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1298                 $success = 0;
1299         };
1300
1301         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1302         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1303         return $success;
1304 }
1305 __PACKAGE__->register_method(
1306         api_name        => "open-ils.worm.wormize.biblio",
1307         method          => "wormize_biblio_record",
1308         api_level       => 1,
1309         argc            => 1,
1310 );
1311 __PACKAGE__->register_method(
1312         api_name        => "open-ils.worm.wormize.biblio.nomap",
1313         method          => "wormize_biblio_record",
1314         api_level       => 1,
1315         argc            => 1,
1316 );
1317 __PACKAGE__->register_method(
1318         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1319         method          => "wormize_biblio_record",
1320         api_level       => 1,
1321         argc            => 1,
1322 );
1323 __PACKAGE__->register_method(
1324         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1325         method          => "wormize_biblio_record",
1326         api_level       => 1,
1327         argc            => 1,
1328 );
1329
1330 sub wormize_authority_record {
1331         my $self = shift;
1332         my $client = shift;
1333         my $rec = shift;
1334
1335         my $commit = 0;
1336         if (!OpenILS::Application::Ingest->in_transaction) {
1337                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1338                 $commit = 1;
1339         }
1340
1341         my $success = 1;
1342         try {
1343                 # clean up the cruft
1344                 unless ($self->api_name =~ /noscrub/o) {
1345                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1346                 }
1347
1348                 # now redo 'em
1349                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1350
1351                 my @full_rec = ();
1352                 my @rec_descriptor = ();
1353                 for my $r (@$bibs) {
1354                         my $xml = $parser->parse_string($r->marc);
1355
1356                         # the full_rec stuff
1357                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1358                                 $fr->record( $r->id );
1359                                 push @full_rec, $fr;
1360                         }
1361
1362                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1363                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1364                         #$rd->record( $r->id );
1365                         #push @rec_descriptor, $rd;
1366                         
1367                 }
1368
1369                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1370
1371                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1372                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1373
1374                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1375
1376         } otherwise {
1377                 $log->debug('Wormization failed : '.shift(), ERROR);
1378                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1379                 $success = 0;
1380         };
1381
1382         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1383         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1384         return $success;
1385 }
1386 __PACKAGE__->register_method(
1387         api_name        => "open-ils.worm.wormize.authority",
1388         method          => "wormize_authority_record",
1389         api_level       => 1,
1390         argc            => 1,
1391 );
1392 __PACKAGE__->register_method(
1393         api_name        => "open-ils.worm.wormize.authority.noscrub",
1394         method          => "wormize_authority_record",
1395         api_level       => 1,
1396         argc            => 1,
1397 );
1398
1399
1400 # --------------------------------------------------------------------------------
1401 # MARC index extraction
1402
1403 package OpenILS::Application::Ingest::XPATH;
1404 use base qw/OpenILS::Application::Ingest/;
1405 use Unicode::Normalize;
1406
1407 # give this a MODS documentElement and an XPATH expression
1408 sub _xpath_to_string {
1409         my $xml = shift;
1410         my $xpath = shift;
1411         my $ns_uri = shift;
1412         my $ns_prefix = shift;
1413         my $unique = shift;
1414
1415         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1416
1417         my $string = "";
1418
1419         # grab the set of matching nodes
1420         my @nodes = $xml->findnodes( $xpath );
1421         for my $value (@nodes) {
1422
1423                 # grab all children of the node
1424                 my @children = $value->childNodes();
1425                 for my $child (@children) {
1426
1427                         # add the childs content to the growing buffer
1428                         my $content = quotemeta($child->textContent);
1429                         next if ($unique && $string =~ /$content/);  # uniquify the values
1430                         $string .= $child->textContent . " ";
1431                 }
1432                 if( ! @children ) {
1433                         $string .= $value->textContent . " ";
1434                 }
1435         }
1436         return NFD($string);
1437 }
1438
1439 sub class_all_index_string_xml {
1440         my $self = shift;
1441         my $client = shift;
1442         my $xml = shift;
1443         my $class = shift;
1444
1445         OpenILS::Application::Ingest->post_init();
1446         $xml = $parser->parse_string($xml) unless (ref $xml);
1447         
1448         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1449         for my $type ( keys %{ $xpathset->{$class} } ) {
1450                 my $value =  _xpath_to_string(
1451                                 $mods_sheet->transform($xml)->documentElement,
1452                                 $xpathset->{$class}->{$type}->{xpath},
1453                                 "http://www.loc.gov/mods/",
1454                                 "mods",
1455                                 1
1456                 );
1457
1458                 next unless $value;
1459
1460                 $value = NFD($value);
1461                 $value =~ s/\pM+//sgo;
1462                 $value =~ s/\pC+//sgo;
1463                 $value =~ s/\W+$//sgo;
1464
1465                 $value =~ s/(\w)\./$1/sgo;
1466                 $value = lc($value);
1467
1468                 my $fm = $class_constructor->new;
1469                 $fm->value( $value );
1470                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1471                 $client->respond($fm);
1472         }
1473         return undef;
1474 }
1475 __PACKAGE__->register_method(  
1476         api_name        => "open-ils.worm.field_entry.class.xml",
1477         method          => "class_all_index_string_xml",
1478         api_level       => 1,
1479         argc            => 1,
1480         stream          => 1,
1481 );                      
1482
1483 sub class_all_index_string_record {
1484         my $self = shift;
1485         my $client = shift;
1486         my $rec = shift;
1487         my $class = shift;
1488
1489         OpenILS::Application::Ingest->post_init();
1490         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1491
1492         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1493                 $fm->source($rec);
1494                 $client->respond($fm);
1495         }
1496         return undef;
1497 }
1498 __PACKAGE__->register_method(  
1499         api_name        => "open-ils.worm.field_entry.class.record",
1500         method          => "class_all_index_string_record",
1501         api_level       => 1,
1502         argc            => 1,
1503         stream          => 1,
1504 );                      
1505
1506
1507 sub class_index_string_xml {
1508         my $self = shift;
1509         my $client = shift;
1510         my $xml = shift;
1511         my $class = shift;
1512         my $type = shift;
1513
1514         OpenILS::Application::Ingest->post_init();
1515         $xml = $parser->parse_string($xml) unless (ref $xml);
1516         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1517 }
1518 __PACKAGE__->register_method(  
1519         api_name        => "open-ils.worm.class.type.xml",
1520         method          => "class_index_string_xml",
1521         api_level       => 1,
1522         argc            => 1,
1523 );                      
1524
1525 sub class_index_string_record {
1526         my $self = shift;
1527         my $client = shift;
1528         my $rec = shift;
1529         my $class = shift;
1530         my $type = shift;
1531
1532         OpenILS::Application::Ingest->post_init();
1533         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1534
1535         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1536         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1537         return $d;
1538 }
1539 __PACKAGE__->register_method(  
1540         api_name        => "open-ils.worm.class.type.record",
1541         method          => "class_index_string_record",
1542         api_level       => 1,
1543         argc            => 1,
1544 );                      
1545
1546 sub xml_xpath {
1547         my $self = shift;
1548         my $client = shift;
1549         my $xml = shift;
1550         my $xpath = shift;
1551         my $uri = shift;
1552         my $prefix = shift;
1553         my $unique = shift;
1554
1555         OpenILS::Application::Ingest->post_init();
1556         $xml = $parser->parse_string($xml) unless (ref $xml);
1557         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1558 }
1559 __PACKAGE__->register_method(  
1560         api_name        => "open-ils.worm.xpath.xml",
1561         method          => "xml_xpath",
1562         api_level       => 1,
1563         argc            => 1,
1564 );                      
1565
1566 sub record_xpath {
1567         my $self = shift;
1568         my $client = shift;
1569         my $rec = shift;
1570         my $xpath = shift;
1571         my $uri = shift;
1572         my $prefix = shift;
1573         my $unique = shift;
1574
1575         OpenILS::Application::Ingest->post_init();
1576         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1577
1578         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1579         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1580         return $d;
1581 }
1582 __PACKAGE__->register_method(  
1583         api_name        => "open-ils.worm.xpath.record",
1584         method          => "record_xpath",
1585         api_level       => 1,
1586         argc            => 1,
1587 );                      
1588
1589
1590 # --------------------------------------------------------------------------------
1591 # MARC Descriptor
1592
1593 package OpenILS::Application::Ingest::Biblio::Leader;
1594 use base qw/OpenILS::Application::Ingest/;
1595 use Unicode::Normalize;
1596
1597 our %marc_type_groups = (
1598         BKS => q/[at]{1}/,
1599         SER => q/[a]{1}/,
1600         VIS => q/[gkro]{1}/,
1601         MIX => q/[p]{1}/,
1602         MAP => q/[ef]{1}/,
1603         SCO => q/[cd]{1}/,
1604         REC => q/[ij]{1}/,
1605         COM => q/[m]{1}/,
1606 );
1607
1608 sub _type_re {
1609         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1610         return qr/$re/;
1611 }
1612
1613 our %biblio_descriptor_code = (
1614         item_type => sub { substr($ldr,6,1); },
1615         item_form =>
1616                 sub {
1617                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1618                                 return substr($oo8,29,1);
1619                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1620                                 return substr($oo8,23,1);
1621                         }
1622                         return ' ';
1623                 },
1624         bib_level => sub { substr($ldr,7,1); },
1625         control_type => sub { substr($ldr,8,1); },
1626         char_encoding => sub { substr($ldr,9,1); },
1627         enc_level => sub { substr($ldr,17,1); },
1628         cat_form => sub { substr($ldr,18,1); },
1629         pub_status => sub { substr($ldr,5,1); },
1630         item_lang => sub { substr($oo8,35,3); },
1631         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1632         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1633         audience => sub { substr($oo8,22,1); },
1634 );
1635
1636 sub _extract_biblio_descriptors {
1637         my $xml = shift;
1638
1639         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1640         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1641         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1642
1643         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1644         for my $rd_field ( keys %biblio_descriptor_code ) {
1645                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1646         }
1647
1648         return $rd_obj;
1649 }
1650
1651 sub extract_biblio_desc_xml {
1652         my $self = shift;
1653         my $client = shift;
1654         my $xml = shift;
1655
1656         $xml = $parser->parse_string($xml) unless (ref $xml);
1657
1658         return _extract_biblio_descriptors( $xml );
1659 }
1660 __PACKAGE__->register_method(  
1661         api_name        => "open-ils.worm.biblio_leader.xml",
1662         method          => "extract_biblio_desc_xml",
1663         api_level       => 1,
1664         argc            => 1,
1665 );                      
1666
1667 sub extract_biblio_desc_record {
1668         my $self = shift;
1669         my $client = shift;
1670         my $rec = shift;
1671
1672         OpenILS::Application::Ingest->post_init();
1673         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1674
1675         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1676         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1677         return $d;
1678 }
1679 __PACKAGE__->register_method(  
1680         api_name        => "open-ils.worm.biblio_leader.record",
1681         method          => "extract_biblio_desc_record",
1682         api_level       => 1,
1683         argc            => 1,
1684 );                      
1685
1686 # --------------------------------------------------------------------------------
1687 # Flat MARC
1688
1689 package OpenILS::Application::Ingest::FlatMARC;
1690 use base qw/OpenILS::Application::Ingest/;
1691 use Unicode::Normalize;
1692
1693
1694 sub _marcxml_to_full_rows {
1695
1696         my $marcxml = shift;
1697         my $xmltype = shift || 'metabib';
1698
1699         my $type = "Fieldmapper::${xmltype}::full_rec";
1700
1701         my @ns_list;
1702         
1703         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1704
1705         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1706                 next unless $tagline;
1707
1708                 my $ns = $type->new;
1709
1710                 $ns->tag( 'LDR' );
1711                 my $val = $tagline->textContent;
1712                 $val = NFD($val);
1713                 $val =~ s/\pM+//sgo;
1714                 $val =~ s/\pC+//sgo;
1715                 $val =~ s/\W+$//sgo;
1716                 $ns->value( $val );
1717
1718                 push @ns_list, $ns;
1719         }
1720
1721         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1722                 next unless $tagline;
1723
1724                 my $ns = $type->new;
1725
1726                 $ns->tag( $tagline->getAttribute( "tag" ) );
1727                 my $val = $tagline->textContent;
1728                 $val = NFD($val);
1729                 $val =~ s/\pM+//sgo;
1730                 $val =~ s/\pC+//sgo;
1731                 $val =~ s/\W+$//sgo;
1732                 $ns->value( $val );
1733
1734                 push @ns_list, $ns;
1735         }
1736
1737         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1738                 next unless $tagline;
1739
1740                 my $tag = $tagline->getAttribute( "tag" );
1741                 my $ind1 = $tagline->getAttribute( "ind1" );
1742                 my $ind2 = $tagline->getAttribute( "ind2" );
1743
1744                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1745                         next unless $data;
1746
1747                         my $ns = $type->new;
1748
1749                         $ns->tag( $tag );
1750                         $ns->ind1( $ind1 );
1751                         $ns->ind2( $ind2 );
1752                         $ns->subfield( $data->getAttribute( "code" ) );
1753                         my $val = $data->textContent;
1754                         $val = NFD($val);
1755                         $val =~ s/\pM+//sgo;
1756                         $val =~ s/\pC+//sgo;
1757                         $val =~ s/\W+$//sgo;
1758                         $ns->value( lc($val) );
1759
1760                         push @ns_list, $ns;
1761                 }
1762         }
1763
1764         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1765         return @ns_list;
1766 }
1767
1768 sub flat_marc_xml {
1769         my $self = shift;
1770         my $client = shift;
1771         my $xml = shift;
1772
1773         $xml = $parser->parse_string($xml) unless (ref $xml);
1774
1775         my $type = 'metabib';
1776         $type = 'authority' if ($self->api_name =~ /authority/o);
1777
1778         OpenILS::Application::Ingest->post_init();
1779
1780         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1781         return undef;
1782 }
1783 __PACKAGE__->register_method(  
1784         api_name        => "open-ils.worm.flat_marc.authority.xml",
1785         method          => "flat_marc_xml",
1786         api_level       => 1,
1787         argc            => 1,
1788         stream          => 1,
1789 );                      
1790 __PACKAGE__->register_method(  
1791         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1792         method          => "flat_marc_xml",
1793         api_level       => 1,
1794         argc            => 1,
1795         stream          => 1,
1796 );                      
1797
1798 sub flat_marc_record {
1799         my $self = shift;
1800         my $client = shift;
1801         my $rec = shift;
1802
1803         my $type = 'biblio';
1804         $type = 'authority' if ($self->api_name =~ /authority/o);
1805
1806         OpenILS::Application::Ingest->post_init();
1807         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1808
1809         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1810         return undef;
1811 }
1812 __PACKAGE__->register_method(  
1813         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1814         method          => "flat_marc_record",
1815         api_level       => 1,
1816         argc            => 1,
1817         stream          => 1,
1818 );                      
1819 __PACKAGE__->register_method(  
1820         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1821         method          => "flat_marc_record",
1822         api_level       => 1,
1823         argc            => 1,
1824         stream          => 1,
1825 );                      
1826
1827
1828 # --------------------------------------------------------------------------------
1829 # Fingerprinting
1830
1831 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1832 use base qw/OpenILS::Application::Ingest/;
1833 use Unicode::Normalize;
1834 use OpenSRF::EX qw/:try/;
1835
1836 my @fp_mods_xpath = (
1837         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1838                         title   => {
1839                                         xpath   => [
1840                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1841                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1842                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1843                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1844                                         ],
1845                                         fixup   => sub {
1846                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1847                                                         $text = NFD($text);
1848                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1849                                                         $text =~ s/\pM+//gso;
1850                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1851                                                         $text = lc($text);
1852                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1853                                                         $text =~ s/\s+/ /sgo;
1854                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1855                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1856                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1857                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1858                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1859                                                         $text =~ s/\[.[^\]]+\]//sgo;
1860                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1861                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1862                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1863                                                 },
1864                         },
1865                         author  => {
1866                                         xpath   => [
1867                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1868                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1869                                         ],
1870                                         fixup   => sub {
1871                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1872                                                         $text = NFD($text);
1873                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1874                                                         $text =~ s/\pM+//gso;
1875                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1876                                                         $text = lc($text);
1877                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1878                                                         $text =~ s/\s+/ /sgo;
1879                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1880                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1881                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1882                                                         $text =~ s/,?\s+.*$//sgo;
1883                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1884                                                 },
1885                         },
1886         ],
1887
1888         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1889                         title   => {
1890                                         xpath   => [
1891                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1892                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1893                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1894                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1895                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1896                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1897                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1898                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1899                                         ],
1900                                         fixup   => sub {
1901                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1902                                                         $text = NFD($text);
1903                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1904                                                         $text =~ s/\pM+//gso;
1905                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1906                                                         $text = lc($text);
1907                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1908                                                         $text =~ s/\s+/ /sgo;
1909                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1910                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1911                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1912                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1913                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1914                                                         $text =~ s/\[.[^\]]+\]//sgo;
1915                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1916                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1917                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1918                                                 },
1919                         },
1920                         author  => {
1921                                         xpath   => [
1922                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1923                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1924                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1925                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1926                                         ],
1927                                         fixup   => sub {
1928                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1929                                                         $text = NFD($text);
1930                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1931                                                         $text =~ s/\pM+//gso;
1932                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1933                                                         $text = lc($text);
1934                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1935                                                         $text =~ s/\s+/ /sgo;
1936                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1937                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1938                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1939                                                         $text =~ s/,?\s+.*$//sgo;
1940                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1941                                                 },
1942                         },
1943         ],
1944
1945 );
1946
1947 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1948
1949 sub _fp_mods {
1950         my $mods = shift;
1951         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1952
1953         my $fp_string = '';
1954
1955         my $match_index = 0;
1956         my $block_index = 1;
1957         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1958                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1959
1960                         my $block_name_index = 0;
1961                         my $block_value_index = 1;
1962                         my $block = $fp_mods_xpath[$block_index];
1963                         while ( my $part = $$block[$block_value_index] ) {
1964                                 local $text;
1965                                 for my $xpath ( @{ $part->{xpath} } ) {
1966                                         $text = $mods->findvalue( $xpath );
1967                                         last if ($text);
1968                                 }
1969
1970                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1971
1972                                 if ($text) {
1973                                         $$part{fixup}->();
1974                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1975                                         $fp_string .= $text;
1976                                 }
1977
1978                                 $block_name_index += 2;
1979                                 $block_value_index += 2;
1980                         }
1981                 }
1982                 if ($fp_string) {
1983                         $fp_string =~ s/\W+//gso;
1984                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1985                         return $fp_string;
1986                 }
1987
1988                 $match_index += 2;
1989                 $block_index += 2;
1990         }
1991         return undef;
1992 }
1993
1994 sub refingerprint_bibrec {
1995         my $self = shift;
1996         my $client = shift;
1997         my $rec = shift;
1998
1999         my $commit = 0;
2000         if (!OpenILS::Application::Ingest->in_transaction) {
2001                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2002                 $commit = 1;
2003         }
2004
2005         my $success = 1;
2006         try {
2007                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2008                 for my $b (@$bibs) {
2009                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2010
2011                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2012
2013                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2014
2015                                 OpenILS::Application::Ingest->storage_req(
2016                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2017                                         { id => $b->id },
2018                                         { fingerprint => $fp->{fingerprint},
2019                                           quality     => $fp->{quality} }
2020                                 );
2021
2022                                 if ($self->api_name !~ /nomap/o) {
2023                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2024                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2025                                                 $b->id
2026                                         );
2027
2028                                         my $old_mrid;
2029                                         if (ref($old_source_map) and @$old_source_map) {
2030                                                 for my $m (@$old_source_map) {
2031                                                         $old_mrid = $m->metarecord;
2032                                                         OpenILS::Application::Ingest->storage_req(
2033                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2034                                                                 $m->id
2035                                                         );
2036                                                 }
2037                                         }
2038
2039                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2040                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2041                                                         { metarecord => $old_mrid }
2042                                         ) if ($old_mrid);
2043
2044                                         if (ref($old_sm) and @$old_sm == 0) {
2045                                                 OpenILS::Application::Ingest->storage_req(
2046                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2047                                                         $old_mrid
2048                                                 );
2049                                         }
2050
2051                                         my $mr = OpenILS::Application::Ingest->storage_req(
2052                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2053                                                         { fingerprint => $fp->{fingerprint} }
2054                                         )->[0];
2055                                 
2056                                         unless ($mr) {
2057                                                 $mr = Fieldmapper::metabib::metarecord->new;
2058                                                 $mr->fingerprint( $fp->{fingerprint} );
2059                                                 $mr->master_record( $b->id );
2060                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2061                                         }
2062
2063                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2064                                         $mr_map->metarecord( $mr->id );
2065                                         $mr_map->source( $b->id );
2066                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2067
2068                                 }
2069                         }
2070                         $client->respond($b->id);
2071                 }
2072
2073         } otherwise {
2074                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2075                 $success = 0;
2076         };
2077
2078         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2079         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2080         return undef;
2081 }
2082 __PACKAGE__->register_method(  
2083         api_name        => "open-ils.worm.fingerprint.record.update",
2084         method          => "refingerprint_bibrec",
2085         api_level       => 1,
2086         argc            => 1,
2087         stream          => 1,
2088 );                      
2089
2090 __PACKAGE__->register_method(  
2091         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2092         method          => "refingerprint_bibrec",
2093         api_level       => 1,
2094         argc            => 1,
2095 );                      
2096
2097 =comment
2098
2099 sub fingerprint_bibrec {
2100         my $self = shift;
2101         my $client = shift;
2102         my $rec = shift;
2103
2104         OpenILS::Application::Ingest->post_init();
2105         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2106
2107         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2108         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2109         return $fp;
2110
2111 }
2112 __PACKAGE__->register_method(  
2113         api_name        => "open-ils.worm.fingerprint.record",
2114         method          => "fingerprint_bibrec",
2115         api_level       => 0,
2116         argc            => 1,
2117 );                      
2118
2119
2120 sub fingerprint_mods {
2121         my $self = shift;
2122         my $client = shift;
2123         my $xml = shift;
2124
2125         OpenILS::Application::Ingest->post_init();
2126         my $mods = $parser->parse_string($xml)->documentElement;
2127
2128         return _fp_mods( $mods );
2129 }
2130 __PACKAGE__->register_method(  
2131         api_name        => "open-ils.worm.fingerprint.mods",
2132         method          => "fingerprint_mods",
2133         api_level       => 1,
2134         argc            => 1,
2135 );                      
2136
2137 sub fingerprint_marc {
2138         my $self = shift;
2139         my $client = shift;
2140         my $xml = shift;
2141
2142         $xml = $parser->parse_string($xml) unless (ref $xml);
2143
2144         OpenILS::Application::Ingest->post_init();
2145         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2146         $log->debug("Returning [$fp] as fingerprint", INFO);
2147         return $fp;
2148 }
2149 __PACKAGE__->register_method(  
2150         api_name        => "open-ils.worm.fingerprint.marc",
2151         method          => "fingerprint_marc",
2152         api_level       => 1,
2153         argc            => 1,
2154 );                      
2155
2156
2157 =cut
2158
2159 sub biblio_fingerprint_record {
2160         my $self = shift;
2161         my $client = shift;
2162         my $rec = shift;
2163
2164         OpenILS::Application::Ingest->post_init();
2165
2166         my $marc = OpenILS::Application::Ingest
2167                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2168                         ->marc;
2169
2170         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2171         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2172         return $fp;
2173 }
2174 __PACKAGE__->register_method(  
2175         api_name        => "open-ils.worm.fingerprint.record",
2176         method          => "biblio_fingerprint_record",
2177         api_level       => 1,
2178         argc            => 1,
2179 );                      
2180
2181 our $fp_script;
2182 sub biblio_fingerprint {
2183         my $self = shift;
2184         my $client = shift;
2185         my $marc = shift;
2186
2187         OpenILS::Application::Ingest->post_init();
2188
2189         $marc = $parser->parse_string($marc) unless (ref $marc);
2190
2191         my $mods = OpenILS::Application::Ingest::entityize(
2192                 $mods_sheet
2193                         ->transform( $marc )
2194                         ->documentElement
2195                         ->toString,
2196                 'D'
2197         );
2198
2199         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2200
2201         warn $marc;
2202         $log->internal("Got MARC [$marc]");
2203         $log->internal("Created MODS [$mods]");
2204
2205         if(!$fp_script) {
2206                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2207                 my $conf = OpenSRF::Utils::SettingsClient->new;
2208
2209                 my $libs        = $conf->config_value(@pfx, 'script_path');
2210                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2211                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2212
2213                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2214                 
2215                 $fp_script = new OpenILS::Utils::ScriptRunner
2216                         ( file          => $script_file,
2217                           paths         => $script_libs,
2218                           reset_count   => 1000 );
2219         }
2220
2221         $log->debug("Applying environment for biblio fingerprinting...");
2222
2223         my $env = {marc => $marc, mods => $mods};
2224         #my $res = {fingerprint => '', quality => '0'};
2225
2226         $fp_script->insert('environment' => $env);
2227         #$fp_script->insert('result' => $res);
2228
2229         $log->debug("Running script for biblio fingerprinting...");
2230
2231         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2232
2233         $log->debug("Script for biblio fingerprinting completed successfully...");
2234
2235         return $res;
2236 }
2237 __PACKAGE__->register_method(  
2238         api_name        => "open-ils.worm.fingerprint.marc",
2239         method          => "biblio_fingerprint",
2240         api_level       => 1,
2241         argc            => 1,
2242 );                      
2243
2244 # --------------------------------------------------------------------------------
2245
2246 1;
2247
2248 __END__
2249 my $in_xact;
2250 my $begin;
2251 my $commit;
2252 my $rollback;
2253 my $lookup;
2254 my $update_entry;
2255 my $mr_lookup;
2256 my $mr_update;
2257 my $mr_create;
2258 my $create_source_map;
2259 my $sm_lookup;
2260 my $rm_old_rd;
2261 my $rm_old_sm;
2262 my $rm_old_fr;
2263 my $rm_old_tr;
2264 my $rm_old_ar;
2265 my $rm_old_sr;
2266 my $rm_old_kr;
2267 my $rm_old_ser;
2268
2269 my $fr_create;
2270 my $rd_create;
2271 my $create = {};
2272
2273 my %descriptor_code = (
2274         item_type => 'substr($ldr,6,1)',
2275         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2276         bib_level => 'substr($ldr,7,1)',
2277         control_type => 'substr($ldr,8,1)',
2278         char_encoding => 'substr($ldr,9,1)',
2279         enc_level => 'substr($ldr,17,1)',
2280         cat_form => 'substr($ldr,18,1)',
2281         pub_status => 'substr($ldr,5,1)',
2282         item_lang => 'substr($oo8,35,3)',
2283         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2284         audience => 'substr($oo8,22,1)',
2285 );
2286
2287 sub wormize {
2288
2289         my $self = shift;
2290         my $client = shift;
2291         my @docids = @_;
2292
2293         my $no_map = 0;
2294         if ($self->api_name =~ /no_map/o) {
2295                 $no_map = 1;
2296         }
2297
2298         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2299                 unless ($in_xact);
2300         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2301                 unless ($begin);
2302         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2303                 unless ($commit);
2304         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2305                 unless ($rollback);
2306         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2307                 unless ($sm_lookup);
2308         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2309                 unless ($mr_lookup);
2310         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2311                 unless ($mr_update);
2312         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2313                 unless ($lookup);
2314         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2315                 unless ($update_entry);
2316         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2317                 unless ($rm_old_sm);
2318         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2319                 unless ($rm_old_rd);
2320         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2321                 unless ($rm_old_fr);
2322         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2323                 unless ($rm_old_tr);
2324         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2325                 unless ($rm_old_ar);
2326         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2327                 unless ($rm_old_sr);
2328         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2329                 unless ($rm_old_kr);
2330         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2331                 unless ($rm_old_ser);
2332         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2333                 unless ($mr_create);
2334         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2335                 unless ($create_source_map);
2336         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2337                 unless ($rd_create);
2338         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2339                 unless ($fr_create);
2340         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2341                 unless ($$create{title});
2342         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2343                 unless ($$create{author});
2344         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2345                 unless ($$create{subject});
2346         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2347                 unless ($$create{keyword});
2348         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2349                 unless ($$create{series});
2350
2351
2352         my ($outer_xact) = $in_xact->run;
2353         try {
2354                 unless ($outer_xact) {
2355                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2356                         my ($r) = $begin->run($client);
2357                         unless (defined $r and $r) {
2358                                 $rollback->run;
2359                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2360                         }
2361                 }
2362         } catch Error with {
2363                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2364         };
2365
2366         my @source_maps;
2367         my @entry_list;
2368         my @mr_list;
2369         my @rd_list;
2370         my @ns_list;
2371         my @mods_data;
2372         my $ret = 0;
2373         for my $entry ( $lookup->run(@docids) ) {
2374                 # step -1: grab the doc from storage
2375                 next unless ($entry);
2376
2377                 if(!$mods_sheet) {
2378                         my $xslt_doc = $parser->parse_file(
2379                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2380                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2381                 }
2382
2383                 my $xml = $entry->marc;
2384                 my $docid = $entry->id;
2385                 my $marcdoc = $parser->parse_string($xml);
2386                 my $modsdoc = $mods_sheet->transform($marcdoc);
2387
2388                 my $mods = $modsdoc->documentElement;
2389                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2390
2391                 $entry->fingerprint( fingerprint_mods( $mods ) );
2392                 push @entry_list, $entry;
2393
2394                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2395
2396                 unless ($no_map) {
2397                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2398                         if (!$mr || !@$mr) {
2399                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2400                                 $mr = new Fieldmapper::metabib::metarecord;
2401                                 $mr->fingerprint( $entry->fingerprint );
2402                                 $mr->master_record( $entry->id );
2403                                 my ($new_mr) = $mr_create->run($mr);
2404                                 $mr->id($new_mr);
2405                                 unless (defined $mr) {
2406                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2407                                 }
2408                         } else {
2409                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2410                                 $mr->mods('');
2411                                 push @mr_list, $mr;
2412                         }
2413
2414                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2415                         $sm->metarecord( $mr->id );
2416                         $sm->source( $entry->id );
2417                         push @source_maps, $sm;
2418                 }
2419
2420                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2421                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2422
2423                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2424                 for my $rd_field ( keys %descriptor_code ) {
2425                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2426                 }
2427                 $rd_obj->record( $docid );
2428                 push @rd_list, $rd_obj;
2429
2430                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2431
2432                 # step 2: build the KOHA rows
2433                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2434                 $_->record( $docid ) for (@tmp_list);
2435                 push @ns_list, @tmp_list;
2436
2437                 $ret++;
2438
2439                 last unless ($self->api_name =~ /batch$/o);
2440         }
2441
2442         $rm_old_rd->run( { record => \@docids } );
2443         $rm_old_fr->run( { record => \@docids } );
2444         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2445         $rm_old_tr->run( { source => \@docids } );
2446         $rm_old_ar->run( { source => \@docids } );
2447         $rm_old_sr->run( { source => \@docids } );
2448         $rm_old_kr->run( { source => \@docids } );
2449         $rm_old_ser->run( { source => \@docids } );
2450
2451         unless ($no_map) {
2452                 my ($sm) = $create_source_map->run(@source_maps);
2453                 unless (defined $sm) {
2454                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2455                 }
2456                 my ($mr) = $mr_update->run(@mr_list);
2457                 unless (defined $mr) {
2458                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2459                 }
2460         }
2461
2462         my ($re) = $update_entry->run(@entry_list);
2463         unless (defined $re) {
2464                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2465         }
2466
2467         my ($rd) = $rd_create->run(@rd_list);
2468         unless (defined $rd) {
2469                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2470         }
2471
2472         my ($fr) = $fr_create->run(@ns_list);
2473         unless (defined $fr) {
2474                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2475         }
2476
2477         # step 5: insert the new metadata
2478         for my $class ( qw/title author subject keyword series/ ) {
2479                 my @md_list = ();
2480                 for my $doc ( @mods_data ) {
2481                         my ($did) = keys %$doc;
2482                         my ($data) = values %$doc;
2483
2484                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2485                         for my $row ( keys %{ $$data{$class} } ) {
2486                                 next unless (exists $$data{$class}{$row});
2487                                 next unless ($$data{$class}{$row}{value});
2488                                 my $fm_obj = $fm_constructor->new;
2489                                 $fm_obj->value( $$data{$class}{$row}{value} );
2490                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2491                                 $fm_obj->source( $did );
2492                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2493
2494                                 push @md_list, $fm_obj;
2495                         }
2496                 }
2497                         
2498                 my ($cr) = $$create{$class}->run(@md_list);
2499                 unless (defined $cr) {
2500                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2501                 }
2502         }
2503
2504         unless ($outer_xact) {
2505                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2506                 my ($c) = $commit->run;
2507                 unless (defined $c and $c) {
2508                         $rollback->run;
2509                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2510                 }
2511         }
2512
2513         return $ret;
2514 }
2515 __PACKAGE__->register_method( 
2516         api_name        => "open-ils.worm.wormize",
2517         method          => "wormize",
2518         api_level       => 1,
2519         argc            => 1,
2520 );
2521 __PACKAGE__->register_method( 
2522         api_name        => "open-ils.worm.wormize.no_map",
2523         method          => "wormize",
2524         api_level       => 1,
2525         argc            => 1,
2526 );
2527 __PACKAGE__->register_method( 
2528         api_name        => "open-ils.worm.wormize.batch",
2529         method          => "wormize",
2530         api_level       => 1,
2531         argc            => 1,
2532 );
2533 __PACKAGE__->register_method( 
2534         api_name        => "open-ils.worm.wormize.no_map.batch",
2535         method          => "wormize",
2536         api_level       => 1,
2537         argc            => 1,
2538 );
2539
2540
2541 my $ain_xact;
2542 my $abegin;
2543 my $acommit;
2544 my $arollback;
2545 my $alookup;
2546 my $aupdate_entry;
2547 my $amr_lookup;
2548 my $amr_update;
2549 my $amr_create;
2550 my $acreate_source_map;
2551 my $asm_lookup;
2552 my $arm_old_rd;
2553 my $arm_old_sm;
2554 my $arm_old_fr;
2555 my $arm_old_tr;
2556 my $arm_old_ar;
2557 my $arm_old_sr;
2558 my $arm_old_kr;
2559 my $arm_old_ser;
2560
2561 my $afr_create;
2562 my $ard_create;
2563 my $acreate = {};
2564
2565 sub authority_wormize {
2566
2567         my $self = shift;
2568         my $client = shift;
2569         my @docids = @_;
2570
2571         my $no_map = 0;
2572         if ($self->api_name =~ /no_map/o) {
2573                 $no_map = 1;
2574         }
2575
2576         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2577                 unless ($in_xact);
2578         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2579                 unless ($begin);
2580         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2581                 unless ($commit);
2582         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2583                 unless ($rollback);
2584         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2585                 unless ($alookup);
2586         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2587                 unless ($aupdate_entry);
2588         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2589                 unless ($arm_old_rd);
2590         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2591                 unless ($arm_old_fr);
2592         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2593                 unless ($ard_create);
2594         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2595                 unless ($afr_create);
2596
2597
2598         my ($outer_xact) = $in_xact->run;
2599         try {
2600                 unless ($outer_xact) {
2601                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2602                         my ($r) = $begin->run($client);
2603                         unless (defined $r and $r) {
2604                                 $rollback->run;
2605                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2606                         }
2607                 }
2608         } catch Error with {
2609                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2610         };
2611
2612         my @source_maps;
2613         my @entry_list;
2614         my @mr_list;
2615         my @rd_list;
2616         my @ns_list;
2617         my @mads_data;
2618         my $ret = 0;
2619         for my $entry ( $lookup->run(@docids) ) {
2620                 # step -1: grab the doc from storage
2621                 next unless ($entry);
2622
2623                 #if(!$mads_sheet) {
2624                 #       my $xslt_doc = $parser->parse_file(
2625                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2626                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2627                 #}
2628
2629                 my $xml = $entry->marc;
2630                 my $docid = $entry->id;
2631                 my $marcdoc = $parser->parse_string($xml);
2632                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2633
2634                 #my $mads = $madsdoc->documentElement;
2635                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2636
2637                 push @entry_list, $entry;
2638
2639                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2640                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2641
2642                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2643                 for my $rd_field ( keys %descriptor_code ) {
2644                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2645                 }
2646                 $rd_obj->record( $docid );
2647                 push @rd_list, $rd_obj;
2648
2649                 # step 2: build the KOHA rows
2650                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2651                 $_->record( $docid ) for (@tmp_list);
2652                 push @ns_list, @tmp_list;
2653
2654                 $ret++;
2655
2656                 last unless ($self->api_name =~ /batch$/o);
2657         }
2658
2659         $arm_old_rd->run( { record => \@docids } );
2660         $arm_old_fr->run( { record => \@docids } );
2661
2662         my ($rd) = $ard_create->run(@rd_list);
2663         unless (defined $rd) {
2664                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2665         }
2666
2667         my ($fr) = $fr_create->run(@ns_list);
2668         unless (defined $fr) {
2669                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2670         }
2671
2672         unless ($outer_xact) {
2673                 $log->debug("Commiting transaction started by Ingest.", INFO);
2674                 my ($c) = $commit->run;
2675                 unless (defined $c and $c) {
2676                         $rollback->run;
2677                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2678                 }
2679         }
2680
2681         return $ret;
2682 }
2683 __PACKAGE__->register_method( 
2684         api_name        => "open-ils.worm.authortiy.wormize",
2685         method          => "wormize",
2686         api_level       => 1,
2687         argc            => 1,
2688 );
2689 __PACKAGE__->register_method( 
2690         api_name        => "open-ils.worm.authority.wormize.batch",
2691         method          => "wormize",
2692         api_level       => 1,
2693         argc            => 1,
2694 );
2695
2696
2697 # --------------------------------------------------------------------------------
2698
2699
2700 sub _marcxml_to_full_rows {
2701
2702         my $marcxml = shift;
2703         my $type = shift || 'Fieldmapper::metabib::full_rec';
2704
2705         my @ns_list;
2706         
2707         my $root = $marcxml->documentElement;
2708
2709         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2710                 next unless $tagline;
2711
2712                 my $ns = new Fieldmapper::metabib::full_rec;
2713
2714                 $ns->tag( 'LDR' );
2715                 my $val = NFD($tagline->textContent);
2716                 $val =~ s/(\pM+)//gso;
2717                 $ns->value( $val );
2718
2719                 push @ns_list, $ns;
2720         }
2721
2722         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2723                 next unless $tagline;
2724
2725                 my $ns = new Fieldmapper::metabib::full_rec;
2726
2727                 $ns->tag( $tagline->getAttribute( "tag" ) );
2728                 my $val = NFD($tagline->textContent);
2729                 $val =~ s/(\pM+)//gso;
2730                 $ns->value( $val );
2731
2732                 push @ns_list, $ns;
2733         }
2734
2735         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2736                 next unless $tagline;
2737
2738                 my $tag = $tagline->getAttribute( "tag" );
2739                 my $ind1 = $tagline->getAttribute( "ind1" );
2740                 my $ind2 = $tagline->getAttribute( "ind2" );
2741
2742                 for my $data ( $tagline->childNodes ) {
2743                         next unless $data;
2744
2745                         my $ns = $type->new;
2746
2747                         $ns->tag( $tag );
2748                         $ns->ind1( $ind1 );
2749                         $ns->ind2( $ind2 );
2750                         $ns->subfield( $data->getAttribute( "code" ) );
2751                         my $val = NFD($data->textContent);
2752                         $val =~ s/(\pM+)//gso;
2753                         $ns->value( lc($val) );
2754
2755                         push @ns_list, $ns;
2756                 }
2757         }
2758         return @ns_list;
2759 }
2760
2761 sub _get_field_value {
2762
2763         my( $root, $xpath ) = @_;
2764
2765         my $string = "";
2766
2767         # grab the set of matching nodes
2768         my @nodes = $root->findnodes( $xpath );
2769         for my $value (@nodes) {
2770
2771                 # grab all children of the node
2772                 my @children = $value->childNodes();
2773                 for my $child (@children) {
2774
2775                         # add the childs content to the growing buffer
2776                         my $content = quotemeta($child->textContent);
2777                         next if ($string =~ /$content/);  # uniquify the values
2778                         $string .= $child->textContent . " ";
2779                 }
2780                 if( ! @children ) {
2781                         $string .= $value->textContent . " ";
2782                 }
2783         }
2784         $string = NFD($string);
2785         $string =~ s/(\pM)//gso;
2786         return lc($string);
2787 }
2788
2789
2790 sub modsdoc_to_values {
2791         my( $self, $mods ) = @_;
2792         my $data = {};
2793         for my $class (keys %$xpathset) {
2794                 $data->{$class} = {};
2795                 for my $type (keys %{$xpathset->{$class}}) {
2796                         $data->{$class}->{$type} = {};
2797                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2798                 }
2799         }
2800         return $data;
2801 }
2802
2803
2804 1;
2805
2806