]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
pipline fixups; integer qualities only, please
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
25         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
26         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
27         atom    => {ns => 'http://www.w3.org/2005/Atom'},
28         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
29         rss092  => {ns => ''},
30         rss093  => {ns => ''},
31         rss094  => {ns => ''},
32         rss10   => {ns => 'http://purl.org/rss/1.0/'},
33         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
34         rss2    => {ns => ''},
35 );
36
37
38 my $log = 'OpenSRF::Utils::Logger';
39
40 my  $parser = XML::LibXML->new();
41 my  $xslt = XML::LibXSLT->new();
42
43 my  $mods_sheet;
44 my  $mads_sheet;
45 my  $xpathset = {};
46 sub initialize {}
47 sub child_init {}
48
49 sub post_init {
50
51         unless (keys %$xpathset) {
52                 $log->debug("Running post_init", DEBUG);
53
54                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
55
56                 unless ($supported_formats{mods}{xslt}) {
57                         $log->debug("Loading MODS XSLT", DEBUG);
58                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
59                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
60                 }
61
62                 unless ($supported_formats{mods3}{xslt}) {
63                         $log->debug("Loading MODS v3 XSLT", DEBUG);
64                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
65                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
66                 }
67
68
69                 my $req = OpenSRF::AppSession
70                                 ->create('open-ils.cstore')
71                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
72                                 ->gather(1);
73
74                 if (ref $req and @$req) {
75                         for my $f (@$req) {
76                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
79                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
80                         }
81                 }
82         }
83 }
84
85 sub entityize {
86         my $stuff = shift;
87         my $form = shift;
88
89         if ($form eq 'D') {
90                 $stuff = NFD($stuff);
91         } else {
92                 $stuff = NFC($stuff);
93         }
94
95         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
96         return $stuff;
97 }
98
99 # --------------------------------------------------------------------------------
100 # Biblio ingest
101
102 package OpenILS::Application::Ingest::Biblio;
103 use base qw/OpenILS::Application::Ingest/;
104 use Unicode::Normalize;
105
106 sub ro_biblio_ingest_single_object {
107         my $self = shift;
108         my $client = shift;
109         my $bib = shift;
110         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
111
112         my $document = $parser->parse_string($xml);
113
114         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
115         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
116         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
117         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
118
119         $_->source($bib->id) for (@mXfe);
120         $_->record($bib->id) for (@mfr);
121         $rd->record($bib->id) if ($rd);
122
123         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
124 }
125 __PACKAGE__->register_method(  
126         api_name        => "open-ils.ingest.full.biblio.object.readonly",
127         method          => "ro_biblio_ingest_single_object",
128         api_level       => 1,
129         argc            => 1,
130 );                      
131
132 sub ro_biblio_ingest_single_xml {
133         my $self = shift;
134         my $client = shift;
135         my $xml = OpenILS::Application::Ingest::entityize(shift);
136
137         my $document = $parser->parse_string($xml);
138
139         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
140         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
141         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
142         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
143
144         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
145 }
146 __PACKAGE__->register_method(  
147         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
148         method          => "ro_biblio_ingest_single_xml",
149         api_level       => 1,
150         argc            => 1,
151 );                      
152
153 sub ro_biblio_ingest_single_record {
154         my $self = shift;
155         my $client = shift;
156         my $rec = shift;
157
158         OpenILS::Application::Ingest->post_init();
159         my $r = OpenSRF::AppSession
160                         ->create('open-ils.cstore')
161                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
162                         ->gather(1);
163
164         return undef unless ($r and @$r);
165
166         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
167
168         $_->source($rec) for (@{$res->{field_entries}});
169         $_->record($rec) for (@{$res->{full_rec}});
170         $res->{descriptor}->record($rec);
171
172         return $res;
173 }
174 __PACKAGE__->register_method(  
175         api_name        => "open-ils.ingest.full.biblio.record.readonly",
176         method          => "ro_biblio_ingest_single_record",
177         api_level       => 1,
178         argc            => 1,
179 );                      
180
181 sub ro_biblio_ingest_stream_record {
182         my $self = shift;
183         my $client = shift;
184
185         OpenILS::Application::Ingest->post_init();
186
187         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
188
189         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
190         
191                 my $rec = $resp->content;
192                 last unless (defined $rec);
193
194                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
195                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
196
197                 $_->source($rec) for (@{$res->{field_entries}});
198                 $_->record($rec) for (@{$res->{full_rec}});
199
200                 $client->respond( $res );
201         }
202
203         return undef;
204 }
205 __PACKAGE__->register_method(  
206         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
207         method          => "ro_biblio_ingest_stream_record",
208         api_level       => 1,
209         stream          => 1,
210 );                      
211
212 sub ro_biblio_ingest_stream_xml {
213         my $self = shift;
214         my $client = shift;
215
216         OpenILS::Application::Ingest->post_init();
217
218         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
219
220         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
221         
222                 my $xml = $resp->content;
223                 last unless (defined $xml);
224
225                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
226                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
227
228                 $client->respond( $res );
229         }
230
231         return undef;
232 }
233 __PACKAGE__->register_method(  
234         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
235         method          => "ro_biblio_ingest_stream_xml",
236         api_level       => 1,
237         stream          => 1,
238 );                      
239
240 sub rw_biblio_ingest_stream_import {
241         my $self = shift;
242         my $client = shift;
243
244         OpenILS::Application::Ingest->post_init();
245
246         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
247
248         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
249         
250                 my $bib = $resp->content;
251                 last unless (defined $bib);
252
253                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
254                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
255
256                 $_->source($bib->id) for (@{$res->{field_entries}});
257                 $_->record($bib->id) for (@{$res->{full_rec}});
258
259                 $client->respond( $res );
260         }
261
262         return undef;
263 }
264 __PACKAGE__->register_method(  
265         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
266         method          => "rw_biblio_ingest_stream_import",
267         api_level       => 1,
268         stream          => 1,
269 );                      
270
271
272 # --------------------------------------------------------------------------------
273 # Authority ingest
274
275 package OpenILS::Application::Ingest::Authority;
276 use base qw/OpenILS::Application::Ingest/;
277 use Unicode::Normalize;
278
279 sub ro_authority_ingest_single_object {
280         my $self = shift;
281         my $client = shift;
282         my $bib = shift;
283         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
284
285         my $document = $parser->parse_string($xml);
286
287         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
288
289         $_->record($bib->id) for (@mfr);
290
291         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
292 }
293 __PACKAGE__->register_method(  
294         api_name        => "open-ils.ingest.full.authority.object.readonly",
295         method          => "ro_authority_ingest_single_object",
296         api_level       => 1,
297         argc            => 1,
298 );                      
299
300 sub ro_authority_ingest_single_xml {
301         my $self = shift;
302         my $client = shift;
303         my $xml = OpenILS::Application::Ingest::entityize(shift);
304
305         my $document = $parser->parse_string($xml);
306
307         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
308
309         return { full_rec => \@mfr };
310 }
311 __PACKAGE__->register_method(  
312         api_name        => "open-ils.ingest.full.authority.xml.readonly",
313         method          => "ro_authority_ingest_single_xml",
314         api_level       => 1,
315         argc            => 1,
316 );                      
317
318 sub ro_authority_ingest_single_record {
319         my $self = shift;
320         my $client = shift;
321         my $rec = shift;
322
323         OpenILS::Application::Ingest->post_init();
324         my $r = OpenSRF::AppSession
325                         ->create('open-ils.cstore')
326                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
327                         ->gather(1);
328
329         return undef unless ($r and @$r);
330
331         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
332
333         $_->record($rec) for (@{$res->{full_rec}});
334         $res->{descriptor}->record($rec);
335
336         return $res;
337 }
338 __PACKAGE__->register_method(  
339         api_name        => "open-ils.ingest.full.authority.record.readonly",
340         method          => "ro_authority_ingest_single_record",
341         api_level       => 1,
342         argc            => 1,
343 );                      
344
345 sub ro_authority_ingest_stream_record {
346         my $self = shift;
347         my $client = shift;
348
349         OpenILS::Application::Ingest->post_init();
350
351         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
352
353         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
354         
355                 my $rec = $resp->content;
356                 last unless (defined $rec);
357
358                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
359                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
360
361                 $_->source($rec) for (@{$res->{field_entries}});
362                 $_->record($rec) for (@{$res->{full_rec}});
363
364                 $client->respond( $res );
365         }
366
367         return undef;
368 }
369 __PACKAGE__->register_method(  
370         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
371         method          => "ro_authority_ingest_stream_record",
372         api_level       => 1,
373         stream          => 1,
374 );                      
375
376 sub ro_authority_ingest_stream_xml {
377         my $self = shift;
378         my $client = shift;
379
380         OpenILS::Application::Ingest->post_init();
381
382         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
383
384         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
385         
386                 my $xml = $resp->content;
387                 last unless (defined $xml);
388
389                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
390                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
391
392                 $client->respond( $res );
393         }
394
395         return undef;
396 }
397 __PACKAGE__->register_method(  
398         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
399         method          => "ro_authority_ingest_stream_xml",
400         api_level       => 1,
401         stream          => 1,
402 );                      
403
404 sub rw_authority_ingest_stream_import {
405         my $self = shift;
406         my $client = shift;
407
408         OpenILS::Application::Ingest->post_init();
409
410         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
411
412         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
413         
414                 my $bib = $resp->content;
415                 last unless (defined $bib);
416
417                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
418                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
419
420                 $_->source($bib->id) for (@{$res->{field_entries}});
421                 $_->record($bib->id) for (@{$res->{full_rec}});
422
423                 $client->respond( $res );
424         }
425
426         return undef;
427 }
428 __PACKAGE__->register_method(  
429         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
430         method          => "rw_authority_ingest_stream_import",
431         api_level       => 1,
432         stream          => 1,
433 );                      
434
435
436 # --------------------------------------------------------------------------------
437 # MARC index extraction
438
439 package OpenILS::Application::Ingest::XPATH;
440 use base qw/OpenILS::Application::Ingest/;
441 use Unicode::Normalize;
442
443 # give this an XML documentElement and an XPATH expression
444 sub xpath_to_string {
445         my $xml = shift;
446         my $xpath = shift;
447         my $ns_uri = shift;
448         my $ns_prefix = shift;
449         my $unique = shift;
450
451         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
452
453         my $string = "";
454
455         # grab the set of matching nodes
456         my @nodes = $xml->findnodes( $xpath );
457         for my $value (@nodes) {
458
459                 # grab all children of the node
460                 my @children = $value->childNodes();
461                 for my $child (@children) {
462
463                         # add the childs content to the growing buffer
464                         my $content = quotemeta($child->textContent);
465                         next if ($unique && $string =~ /$content/);  # uniquify the values
466                         $string .= $child->textContent . " ";
467                 }
468                 if( ! @children ) {
469                         $string .= $value->textContent . " ";
470                 }
471         }
472         return NFD($string);
473 }
474
475 sub class_index_string_xml {
476         my $self = shift;
477         my $client = shift;
478         my $xml = shift;
479         my @classes = @_;
480
481         OpenILS::Application::Ingest->post_init();
482         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
483
484         my %transform_cache;
485         
486         for my $class (@classes) {
487                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
488                 for my $type ( keys %{ $xpathset->{$class} } ) {
489
490                         my $def = $xpathset->{$class}->{$type};
491                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
492
493                         my $document = $xml;
494
495                         if ($sf->{xslt}) {
496                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
497                                 $transform_cache{$def->{format}} = $document;
498                         }
499
500                         my $value =  xpath_to_string(
501                                         $document->documentElement      => $def->{xpath},
502                                         $sf->{ns}                       => $def->{format},
503                                         1
504                         );
505
506                         next unless $value;
507
508                         $value = NFD($value);
509                         $value =~ s/\pM+//sgo;
510                         $value =~ s/\pC+//sgo;
511                         $value =~ s/\W+$//sgo;
512
513                         $value =~ s/(\w)\.+(\w)/$1$2/sgo;
514                         $value = lc($value);
515
516                         my $fm = $class_constructor->new;
517                         $fm->value( $value );
518                         $fm->field( $xpathset->{$class}->{$type}->{id} );
519                         $client->respond($fm);
520                 }
521         }
522         return undef;
523 }
524 __PACKAGE__->register_method(  
525         api_name        => "open-ils.ingest.field_entry.class.xml",
526         method          => "class_index_string_xml",
527         api_level       => 1,
528         argc            => 2,
529         stream          => 1,
530 );                      
531
532 sub class_index_string_record {
533         my $self = shift;
534         my $client = shift;
535         my $rec = shift;
536         my @classes = shift;
537
538         OpenILS::Application::Ingest->post_init();
539         my $r = OpenSRF::AppSession
540                         ->create('open-ils.cstore')
541                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
542                         ->gather(1);
543
544         return undef unless ($r and @$r);
545
546         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
547                 $fm->source($rec);
548                 $client->respond($fm);
549         }
550         return undef;
551 }
552 __PACKAGE__->register_method(  
553         api_name        => "open-ils.ingest.field_entry.class.record",
554         method          => "class_index_string_record",
555         api_level       => 1,
556         argc            => 2,
557         stream          => 1,
558 );                      
559
560 sub all_index_string_xml {
561         my $self = shift;
562         my $client = shift;
563         my $xml = shift;
564
565         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
566                 $client->respond($fm);
567         }
568         return undef;
569 }
570 __PACKAGE__->register_method(  
571         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
572         method          => "all_index_string_xml",
573         api_level       => 1,
574         argc            => 1,
575         stream          => 1,
576 );                      
577
578 sub all_index_string_record {
579         my $self = shift;
580         my $client = shift;
581         my $rec = shift;
582
583         OpenILS::Application::Ingest->post_init();
584         my $r = OpenSRF::AppSession
585                         ->create('open-ils.cstore')
586                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
587                         ->gather(1);
588
589         return undef unless ($r and @$r);
590
591         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
592                 $fm->source($rec);
593                 $client->respond($fm);
594         }
595         return undef;
596 }
597 __PACKAGE__->register_method(  
598         api_name        => "open-ils.ingest.extract.field_entry.all.record",
599         method          => "all_index_string_record",
600         api_level       => 1,
601         argc            => 1,
602         stream          => 1,
603 );                      
604
605 # --------------------------------------------------------------------------------
606 # Flat MARC
607
608 package OpenILS::Application::Ingest::FlatMARC;
609 use base qw/OpenILS::Application::Ingest/;
610 use Unicode::Normalize;
611
612
613 sub _marcxml_to_full_rows {
614
615         my $marcxml = shift;
616         my $xmltype = shift || 'metabib';
617
618         my $type = "Fieldmapper::${xmltype}::full_rec";
619
620         my @ns_list;
621         
622         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
623
624         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
625                 next unless $tagline;
626
627                 my $ns = $type->new;
628
629                 $ns->tag( 'LDR' );
630                 my $val = $tagline->textContent;
631                 $val = NFD($val);
632                 $val =~ s/\pM+//sgo;
633                 $val =~ s/\pC+//sgo;
634                 $val =~ s/\W+$//sgo;
635                 $ns->value( $val );
636
637                 push @ns_list, $ns;
638         }
639
640         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
641                 next unless $tagline;
642
643                 my $ns = $type->new;
644
645                 $ns->tag( $tagline->getAttribute( "tag" ) );
646                 my $val = $tagline->textContent;
647                 $val = NFD($val);
648                 $val =~ s/\pM+//sgo;
649                 $val =~ s/\pC+//sgo;
650                 $val =~ s/\W+$//sgo;
651                 $ns->value( $val );
652
653                 push @ns_list, $ns;
654         }
655
656         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
657                 next unless $tagline;
658
659                 my $tag = $tagline->getAttribute( "tag" );
660                 my $ind1 = $tagline->getAttribute( "ind1" );
661                 my $ind2 = $tagline->getAttribute( "ind2" );
662
663                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
664                         next unless $data;
665
666                         my $ns = $type->new;
667
668                         $ns->tag( $tag );
669                         $ns->ind1( $ind1 );
670                         $ns->ind2( $ind2 );
671                         $ns->subfield( $data->getAttribute( "code" ) );
672                         my $val = $data->textContent;
673                         $val = NFD($val);
674                         $val =~ s/\pM+//sgo;
675                         $val =~ s/\pC+//sgo;
676                         $val =~ s/\W+$//sgo;
677                         $ns->value( lc($val) );
678
679                         push @ns_list, $ns;
680                 }
681         }
682
683         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
684         return @ns_list;
685 }
686
687 sub flat_marc_xml {
688         my $self = shift;
689         my $client = shift;
690         my $xml = shift;
691
692         $log->debug("processing [$xml]");
693
694         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
695
696         my $type = 'metabib';
697         $type = 'authority' if ($self->api_name =~ /authority/o);
698
699         OpenILS::Application::Ingest->post_init();
700
701         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
702         return undef;
703 }
704 __PACKAGE__->register_method(  
705         api_name        => "open-ils.ingest.flat_marc.authority.xml",
706         method          => "flat_marc_xml",
707         api_level       => 1,
708         argc            => 1,
709         stream          => 1,
710 );                      
711 __PACKAGE__->register_method(  
712         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
713         method          => "flat_marc_xml",
714         api_level       => 1,
715         argc            => 1,
716         stream          => 1,
717 );                      
718
719 sub flat_marc_record {
720         my $self = shift;
721         my $client = shift;
722         my $rec = shift;
723
724         my $type = 'biblio';
725         $type = 'authority' if ($self->api_name =~ /authority/o);
726
727         OpenILS::Application::Ingest->post_init();
728         my $r = OpenSRF::AppSession
729                         ->create('open-ils.cstore')
730                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
731                         ->gather(1);
732
733
734         return undef unless ($r and $r->marc);
735
736         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
737         for my $row (@rows) {
738                 $client->respond($row);
739                 $log->debug(JSON->perl2JSON($row), DEBUG);
740         }
741         return undef;
742 }
743 __PACKAGE__->register_method(  
744         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
745         method          => "flat_marc_record",
746         api_level       => 1,
747         argc            => 1,
748         stream          => 1,
749 );                      
750 __PACKAGE__->register_method(  
751         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
752         method          => "flat_marc_record",
753         api_level       => 1,
754         argc            => 1,
755         stream          => 1,
756 );                      
757
758 # --------------------------------------------------------------------------------
759 # Fingerprinting
760
761 package OpenILS::Application::Ingest::Biblio::Fingerprint;
762 use base qw/OpenILS::Application::Ingest/;
763 use Unicode::Normalize;
764 use OpenSRF::EX qw/:try/;
765
766 sub biblio_fingerprint_record {
767         my $self = shift;
768         my $client = shift;
769         my $rec = shift;
770
771         OpenILS::Application::Ingest->post_init();
772
773         my $r = OpenSRF::AppSession
774                         ->create('open-ils.cstore')
775                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
776                         ->gather(1);
777
778         return undef unless ($r and $r->marc);
779
780         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
781         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
782         $fp->{quality} = int($fp->{quality});
783         return $fp;
784 }
785 __PACKAGE__->register_method(  
786         api_name        => "open-ils.ingest.fingerprint.record",
787         method          => "biblio_fingerprint_record",
788         api_level       => 1,
789         argc            => 1,
790 );                      
791
792 our $fp_script;
793 sub biblio_fingerprint {
794         my $self = shift;
795         my $client = shift;
796         my $xml = OpenILS::Application::Ingest::entityize(shift);
797
798         $log->internal("Got MARC [$xml]");
799
800         if(!$fp_script) {
801                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
802                 my $conf = OpenSRF::Utils::SettingsClient->new;
803
804                 my $libs        = $conf->config_value(@pfx, 'script_path');
805                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
806                 my $script_libs = (ref($libs)) ? $libs : [$libs];
807
808                 $log->debug("Loading script $script_file for biblio fingerprinting...");
809                 
810                 $fp_script = new OpenILS::Utils::ScriptRunner
811                         ( file          => $script_file,
812                           paths         => $script_libs,
813                           reset_count   => 100 );
814         }
815
816         $fp_script->insert('environment' => {marc => $xml} => 1);
817
818         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
819         $log->debug("Script for biblio fingerprinting completed successfully...");
820
821         return $res;
822 }
823 __PACKAGE__->register_method(  
824         api_name        => "open-ils.ingest.fingerprint.xml",
825         method          => "biblio_fingerprint",
826         api_level       => 1,
827         argc            => 1,
828 );                      
829
830 our $rd_script;
831 sub biblio_descriptor {
832         my $self = shift;
833         my $client = shift;
834         my $xml = OpenILS::Application::Ingest::entityize(shift);
835
836         $log->internal("Got MARC [$xml]");
837
838         if(!$rd_script) {
839                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
840                 my $conf = OpenSRF::Utils::SettingsClient->new;
841
842                 my $libs        = $conf->config_value(@pfx, 'script_path');
843                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
844                 my $script_libs = (ref($libs)) ? $libs : [$libs];
845
846                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
847                 
848                 $rd_script = new OpenILS::Utils::ScriptRunner
849                         ( file          => $script_file,
850                           paths         => $script_libs,
851                           reset_count   => 100 );
852         }
853
854         $log->debug("Setting up environment for descriptor extraction script...");
855         $rd_script->insert('environment.marc' => $xml => 1);
856         $log->debug("Environment building complete...");
857
858         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
859         $log->debug("Script for biblio descriptor extraction completed successfully");
860
861         return $res;
862 }
863 __PACKAGE__->register_method(  
864         api_name        => "open-ils.ingest.descriptor.xml",
865         method          => "biblio_descriptor",
866         api_level       => 1,
867         argc            => 1,
868 );                      
869
870
871 1;
872
873 __END__
874
875 sub in_transaction {
876         OpenILS::Application::Ingest->post_init();
877         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
878 }
879
880 sub begin_transaction {
881         my $self = shift;
882         my $client = shift;
883         
884         OpenILS::Application::Ingest->post_init();
885         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
886         
887         try {
888                 if (!$outer_xact) {
889                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
890                         #__PACKAGE__->st_sess->connect;
891                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
892                         unless (defined $r and $r) {
893                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
894                                 #__PACKAGE__->st_sess->disconnect;
895                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
896                         }
897                 }
898         } otherwise {
899                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
900         };
901
902         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
903 }
904
905 sub rollback_transaction {
906         my $self = shift;
907         my $client = shift;
908
909         OpenILS::Application::Ingest->post_init();
910         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
911
912         try {
913                 if ($outer_xact) {
914                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
915                 } else {
916                         $log->debug("Ingest isn't inside a transaction.", INFO);
917                 }
918         } catch Error with {
919                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
920         };
921
922         return 1;
923 }
924
925 sub commit_transaction {
926         my $self = shift;
927         my $client = shift;
928
929         OpenILS::Application::Ingest->post_init();
930         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
931
932         try {
933                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
934                 if ($outer_xact) {
935                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
936                         unless (defined $r and $r) {
937                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
938                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
939                         }
940                         #__PACKAGE__->st_sess->disconnect;
941                 } else {
942                         $log->debug("Ingest isn't inside a transaction.", INFO);
943                 }
944         } catch Error with {
945                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
946         };
947
948         return 1;
949 }
950
951 sub storage_req {
952         my $self = shift;
953         my $method = shift;
954         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
955         return shift( @res );
956 }
957
958 sub scrub_authority_record {
959         my $self = shift;
960         my $client = shift;
961         my $rec = shift;
962
963         my $commit = 0;
964         if (!OpenILS::Application::Ingest->in_transaction) {
965                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
966                 $commit = 1;
967         }
968
969         my $success = 1;
970         try {
971                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
972
973                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
974                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
975
976                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
977         } otherwise {
978                 $log->debug('Scrubbing failed : '.shift(), ERROR);
979                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
980                 $success = 0;
981         };
982
983         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
984         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
985         return $success;
986 }
987 __PACKAGE__->register_method(  
988         api_name        => "open-ils.worm.scrub.authority",
989         method          => "scrub_authority_record",
990         api_level       => 1,
991         argc            => 1,
992 );                      
993
994
995 sub scrub_metabib_record {
996         my $self = shift;
997         my $client = shift;
998         my $rec = shift;
999
1000         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1001                 $rec = OpenILS::Application::Ingest->storage_req(
1002                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1003                 );
1004         }
1005
1006         my $commit = 0;
1007         if (!OpenILS::Application::Ingest->in_transaction) {
1008                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1009                 $commit = 1;
1010         }
1011
1012         my $success = 1;
1013         try {
1014                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1015                 
1016                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1017                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1018                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1019                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1020                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1021                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1022                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1023                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1024
1025                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1026                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1027
1028                 for my $mr (@$masters) {
1029                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1030                         my $others = OpenILS::Application::Ingest->storage_req(
1031                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1032
1033                         if (@$others) {
1034                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1035                                 $mr->master_record($others->[0]->source);
1036                                 OpenILS::Application::Ingest->storage_req(
1037                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1038                                         { id => $mr->id },
1039                                         { master_record => $others->[0]->source, mods => undef }
1040                                 );
1041                         } else {
1042                                 warn "Removing metarecord whose master is $rec";
1043                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1044                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1045                                 warn "Metarecord removed";
1046                                 $log->debug( "Metarecord removed", DEBUG);
1047                         }
1048                 }
1049
1050                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1051
1052         } otherwise {
1053                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1054                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1055                 $success = 0;
1056         };
1057
1058         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1059         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1060         return $success;
1061 }
1062 __PACKAGE__->register_method(  
1063         api_name        => "open-ils.worm.scrub.biblio",
1064         method          => "scrub_metabib_record",
1065         api_level       => 1,
1066         argc            => 1,
1067 );                      
1068
1069 sub wormize_biblio_metarecord {
1070         my $self = shift;
1071         my $client = shift;
1072         my $mrec = shift;
1073
1074         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1075
1076         my $count = 0;
1077         for my $r (@$recs) {
1078                 my $success = 0;
1079                 try {
1080                         $success = wormize_biblio_record($self => $client => $r->source);
1081                         $client->respond(
1082                                 { record  => $r->source,
1083                                   metarecord => $rec->metarecord,
1084                                   success => $success,
1085                                 }
1086                         );
1087                 } catch Error with {
1088                         my $e = shift;
1089                         $client->respond(
1090                                 { record  => $r->source,
1091                                   metarecord => $rec->metarecord,
1092                                   success => $success,
1093                                   error   => $e,
1094                                 }
1095                         );
1096                 };
1097         }
1098         return undef;
1099 }
1100 __PACKAGE__->register_method(
1101         api_name        => "open-ils.worm.wormize.metarecord",
1102         method          => "wormize_biblio_metarecord",
1103         api_level       => 1,
1104         argc            => 1,
1105         stream          => 1,
1106 );
1107 __PACKAGE__->register_method(
1108         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1109         method          => "wormize_biblio_metarecord",
1110         api_level       => 1,
1111         argc            => 1,
1112         stream          => 1,
1113 );
1114 __PACKAGE__->register_method(
1115         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1116         method          => "wormize_biblio_metarecord",
1117         api_level       => 1,
1118         argc            => 1,
1119         stream          => 1,
1120 );
1121 __PACKAGE__->register_method(
1122         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1123         method          => "wormize_biblio_metarecord",
1124         api_level       => 1,
1125         argc            => 1,
1126         stream          => 1,
1127 );
1128
1129
1130 sub wormize_biblio_record {
1131         my $self = shift;
1132         my $client = shift;
1133         my $rec = shift;
1134
1135         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1136                 $rec = OpenILS::Application::Ingest->storage_req(
1137                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1138                 );
1139         }
1140
1141
1142         my $commit = 0;
1143         if (!OpenILS::Application::Ingest->in_transaction) {
1144                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1145                 $commit = 1;
1146         }
1147
1148         my $success = 1;
1149         try {
1150                 # clean up the cruft
1151                 unless ($self->api_name =~ /noscrub/o) {
1152                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1153                 }
1154
1155                 # now redo 'em
1156                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1157
1158                 my @full_rec = ();
1159                 my @rec_descriptor = ();
1160                 my %field_entry = (
1161                         title   => [],
1162                         author  => [],
1163                         subject => [],
1164                         keyword => [],
1165                         series  => [],
1166                 );
1167                 my %metarecord = ();
1168                 my @source_map = ();
1169                 for my $r (@$bibs) {
1170                         try {
1171                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1172
1173                                 my $xml = $parser->parse_string($r->marc);
1174
1175                                 #update the fingerprint
1176                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1177                                 OpenILS::Application::Ingest->storage_req(
1178                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1179                                         { id => $r->id },
1180                                         { fingerprint => $fp->{fingerprint},
1181                                           quality     => int($fp->{quality}) }
1182                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1183
1184                                 # the full_rec stuff
1185                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1186                                         $fr->record( $r->id );
1187                                         push @full_rec, $fr;
1188                                 }
1189
1190                                 # the rec_descriptor stuff
1191                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1192                                 $rd->record( $r->id );
1193                                 push @rec_descriptor, $rd;
1194                         
1195                                 # the indexing field entry stuff
1196                                 for my $class ( qw/title author subject keyword series/ ) {
1197                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1198                                                 $fe->source( $r->id );
1199                                                 push @{$field_entry{$class}}, $fe;
1200                                         }
1201                                 }
1202
1203                                 unless ($self->api_name =~ /nomap/o) {
1204                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1205                                 
1206                                         unless ($mr) {
1207                                                 $mr = Fieldmapper::metabib::metarecord->new;
1208                                                 $mr->fingerprint( $fp->{fingerprint} );
1209                                                 $mr->master_record( $r->id );
1210                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1211                                         }
1212
1213                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1214                                         $mr_map->metarecord( $mr->id );
1215                                         $mr_map->source( $r->id );
1216                                         push @source_map, $mr_map;
1217
1218                                         $metarecord{$mr->id} = $mr;
1219                                 }
1220                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1221                         } otherwise {
1222                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1223                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1224                         };
1225                 }
1226                 
1227
1228                 if (@rec_descriptor) {
1229                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1230
1231                         OpenILS::Application::Ingest->storage_req(
1232                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1233                                 @source_map
1234                         ) if (@source_map);
1235
1236                         for my $mr ( values %metarecord ) {
1237                                 my $sources = OpenILS::Application::Ingest->storage_req(
1238                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1239                                         $mr->id
1240                                 );
1241
1242                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1243                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1244                                         [ map { $_->source } @$sources ]
1245                                 );
1246
1247                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1248
1249                                 OpenILS::Application::Ingest->storage_req(
1250                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1251                                         { id => $mr->id },
1252                                         { master_record => $master->id, mods => undef }
1253                                 );
1254                         }
1255
1256                         OpenILS::Application::Ingest->storage_req(
1257                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1258                                 @rec_descriptor
1259                         ) if (@rec_descriptor);
1260
1261                         OpenILS::Application::Ingest->storage_req(
1262                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1263                                 @full_rec
1264                         ) if (@full_rec);
1265
1266                         OpenILS::Application::Ingest->storage_req(
1267                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1268                                 @{ $field_entry{title} }
1269                         ) if (@{ $field_entry{title} });
1270
1271                         OpenILS::Application::Ingest->storage_req(
1272                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1273                                 @{ $field_entry{author} }
1274                         ) if (@{ $field_entry{author} });
1275                         
1276                         OpenILS::Application::Ingest->storage_req(
1277                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1278                                 @{ $field_entry{subject} }
1279                         ) if (@{ $field_entry{subject} });
1280
1281                         OpenILS::Application::Ingest->storage_req(
1282                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1283                                 @{ $field_entry{keyword} }
1284                         ) if (@{ $field_entry{keyword} });
1285
1286                         OpenILS::Application::Ingest->storage_req(
1287                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1288                                 @{ $field_entry{series} }
1289                         ) if (@{ $field_entry{series} });
1290
1291                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1292                 } else {
1293                         $success = 0;
1294                 }
1295
1296         } otherwise {
1297                 $log->debug('Wormization failed : '.shift(), ERROR);
1298                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1299                 $success = 0;
1300         };
1301
1302         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1303         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1304         return $success;
1305 }
1306 __PACKAGE__->register_method(
1307         api_name        => "open-ils.worm.wormize.biblio",
1308         method          => "wormize_biblio_record",
1309         api_level       => 1,
1310         argc            => 1,
1311 );
1312 __PACKAGE__->register_method(
1313         api_name        => "open-ils.worm.wormize.biblio.nomap",
1314         method          => "wormize_biblio_record",
1315         api_level       => 1,
1316         argc            => 1,
1317 );
1318 __PACKAGE__->register_method(
1319         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1320         method          => "wormize_biblio_record",
1321         api_level       => 1,
1322         argc            => 1,
1323 );
1324 __PACKAGE__->register_method(
1325         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1326         method          => "wormize_biblio_record",
1327         api_level       => 1,
1328         argc            => 1,
1329 );
1330
1331 sub wormize_authority_record {
1332         my $self = shift;
1333         my $client = shift;
1334         my $rec = shift;
1335
1336         my $commit = 0;
1337         if (!OpenILS::Application::Ingest->in_transaction) {
1338                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1339                 $commit = 1;
1340         }
1341
1342         my $success = 1;
1343         try {
1344                 # clean up the cruft
1345                 unless ($self->api_name =~ /noscrub/o) {
1346                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1347                 }
1348
1349                 # now redo 'em
1350                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1351
1352                 my @full_rec = ();
1353                 my @rec_descriptor = ();
1354                 for my $r (@$bibs) {
1355                         my $xml = $parser->parse_string($r->marc);
1356
1357                         # the full_rec stuff
1358                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1359                                 $fr->record( $r->id );
1360                                 push @full_rec, $fr;
1361                         }
1362
1363                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1364                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1365                         #$rd->record( $r->id );
1366                         #push @rec_descriptor, $rd;
1367                         
1368                 }
1369
1370                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1371
1372                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1373                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1374
1375                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1376
1377         } otherwise {
1378                 $log->debug('Wormization failed : '.shift(), ERROR);
1379                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1380                 $success = 0;
1381         };
1382
1383         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1384         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1385         return $success;
1386 }
1387 __PACKAGE__->register_method(
1388         api_name        => "open-ils.worm.wormize.authority",
1389         method          => "wormize_authority_record",
1390         api_level       => 1,
1391         argc            => 1,
1392 );
1393 __PACKAGE__->register_method(
1394         api_name        => "open-ils.worm.wormize.authority.noscrub",
1395         method          => "wormize_authority_record",
1396         api_level       => 1,
1397         argc            => 1,
1398 );
1399
1400
1401 # --------------------------------------------------------------------------------
1402 # MARC index extraction
1403
1404 package OpenILS::Application::Ingest::XPATH;
1405 use base qw/OpenILS::Application::Ingest/;
1406 use Unicode::Normalize;
1407
1408 # give this a MODS documentElement and an XPATH expression
1409 sub _xpath_to_string {
1410         my $xml = shift;
1411         my $xpath = shift;
1412         my $ns_uri = shift;
1413         my $ns_prefix = shift;
1414         my $unique = shift;
1415
1416         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1417
1418         my $string = "";
1419
1420         # grab the set of matching nodes
1421         my @nodes = $xml->findnodes( $xpath );
1422         for my $value (@nodes) {
1423
1424                 # grab all children of the node
1425                 my @children = $value->childNodes();
1426                 for my $child (@children) {
1427
1428                         # add the childs content to the growing buffer
1429                         my $content = quotemeta($child->textContent);
1430                         next if ($unique && $string =~ /$content/);  # uniquify the values
1431                         $string .= $child->textContent . " ";
1432                 }
1433                 if( ! @children ) {
1434                         $string .= $value->textContent . " ";
1435                 }
1436         }
1437         return NFD($string);
1438 }
1439
1440 sub class_all_index_string_xml {
1441         my $self = shift;
1442         my $client = shift;
1443         my $xml = shift;
1444         my $class = shift;
1445
1446         OpenILS::Application::Ingest->post_init();
1447         $xml = $parser->parse_string($xml) unless (ref $xml);
1448         
1449         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1450         for my $type ( keys %{ $xpathset->{$class} } ) {
1451                 my $value =  _xpath_to_string(
1452                                 $mods_sheet->transform($xml)->documentElement,
1453                                 $xpathset->{$class}->{$type}->{xpath},
1454                                 "http://www.loc.gov/mods/",
1455                                 "mods",
1456                                 1
1457                 );
1458
1459                 next unless $value;
1460
1461                 $value = NFD($value);
1462                 $value =~ s/\pM+//sgo;
1463                 $value =~ s/\pC+//sgo;
1464                 $value =~ s/\W+$//sgo;
1465
1466                 $value =~ s/(\w)\./$1/sgo;
1467                 $value = lc($value);
1468
1469                 my $fm = $class_constructor->new;
1470                 $fm->value( $value );
1471                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1472                 $client->respond($fm);
1473         }
1474         return undef;
1475 }
1476 __PACKAGE__->register_method(  
1477         api_name        => "open-ils.worm.field_entry.class.xml",
1478         method          => "class_all_index_string_xml",
1479         api_level       => 1,
1480         argc            => 1,
1481         stream          => 1,
1482 );                      
1483
1484 sub class_all_index_string_record {
1485         my $self = shift;
1486         my $client = shift;
1487         my $rec = shift;
1488         my $class = shift;
1489
1490         OpenILS::Application::Ingest->post_init();
1491         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1492
1493         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1494                 $fm->source($rec);
1495                 $client->respond($fm);
1496         }
1497         return undef;
1498 }
1499 __PACKAGE__->register_method(  
1500         api_name        => "open-ils.worm.field_entry.class.record",
1501         method          => "class_all_index_string_record",
1502         api_level       => 1,
1503         argc            => 1,
1504         stream          => 1,
1505 );                      
1506
1507
1508 sub class_index_string_xml {
1509         my $self = shift;
1510         my $client = shift;
1511         my $xml = shift;
1512         my $class = shift;
1513         my $type = shift;
1514
1515         OpenILS::Application::Ingest->post_init();
1516         $xml = $parser->parse_string($xml) unless (ref $xml);
1517         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1518 }
1519 __PACKAGE__->register_method(  
1520         api_name        => "open-ils.worm.class.type.xml",
1521         method          => "class_index_string_xml",
1522         api_level       => 1,
1523         argc            => 1,
1524 );                      
1525
1526 sub class_index_string_record {
1527         my $self = shift;
1528         my $client = shift;
1529         my $rec = shift;
1530         my $class = shift;
1531         my $type = shift;
1532
1533         OpenILS::Application::Ingest->post_init();
1534         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1535
1536         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1537         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1538         return $d;
1539 }
1540 __PACKAGE__->register_method(  
1541         api_name        => "open-ils.worm.class.type.record",
1542         method          => "class_index_string_record",
1543         api_level       => 1,
1544         argc            => 1,
1545 );                      
1546
1547 sub xml_xpath {
1548         my $self = shift;
1549         my $client = shift;
1550         my $xml = shift;
1551         my $xpath = shift;
1552         my $uri = shift;
1553         my $prefix = shift;
1554         my $unique = shift;
1555
1556         OpenILS::Application::Ingest->post_init();
1557         $xml = $parser->parse_string($xml) unless (ref $xml);
1558         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1559 }
1560 __PACKAGE__->register_method(  
1561         api_name        => "open-ils.worm.xpath.xml",
1562         method          => "xml_xpath",
1563         api_level       => 1,
1564         argc            => 1,
1565 );                      
1566
1567 sub record_xpath {
1568         my $self = shift;
1569         my $client = shift;
1570         my $rec = shift;
1571         my $xpath = shift;
1572         my $uri = shift;
1573         my $prefix = shift;
1574         my $unique = shift;
1575
1576         OpenILS::Application::Ingest->post_init();
1577         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1578
1579         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1580         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1581         return $d;
1582 }
1583 __PACKAGE__->register_method(  
1584         api_name        => "open-ils.worm.xpath.record",
1585         method          => "record_xpath",
1586         api_level       => 1,
1587         argc            => 1,
1588 );                      
1589
1590
1591 # --------------------------------------------------------------------------------
1592 # MARC Descriptor
1593
1594 package OpenILS::Application::Ingest::Biblio::Leader;
1595 use base qw/OpenILS::Application::Ingest/;
1596 use Unicode::Normalize;
1597
1598 our %marc_type_groups = (
1599         BKS => q/[at]{1}/,
1600         SER => q/[a]{1}/,
1601         VIS => q/[gkro]{1}/,
1602         MIX => q/[p]{1}/,
1603         MAP => q/[ef]{1}/,
1604         SCO => q/[cd]{1}/,
1605         REC => q/[ij]{1}/,
1606         COM => q/[m]{1}/,
1607 );
1608
1609 sub _type_re {
1610         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1611         return qr/$re/;
1612 }
1613
1614 our %biblio_descriptor_code = (
1615         item_type => sub { substr($ldr,6,1); },
1616         item_form =>
1617                 sub {
1618                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1619                                 return substr($oo8,29,1);
1620                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1621                                 return substr($oo8,23,1);
1622                         }
1623                         return ' ';
1624                 },
1625         bib_level => sub { substr($ldr,7,1); },
1626         control_type => sub { substr($ldr,8,1); },
1627         char_encoding => sub { substr($ldr,9,1); },
1628         enc_level => sub { substr($ldr,17,1); },
1629         cat_form => sub { substr($ldr,18,1); },
1630         pub_status => sub { substr($ldr,5,1); },
1631         item_lang => sub { substr($oo8,35,3); },
1632         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1633         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1634         audience => sub { substr($oo8,22,1); },
1635 );
1636
1637 sub _extract_biblio_descriptors {
1638         my $xml = shift;
1639
1640         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1641         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1642         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1643
1644         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1645         for my $rd_field ( keys %biblio_descriptor_code ) {
1646                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1647         }
1648
1649         return $rd_obj;
1650 }
1651
1652 sub extract_biblio_desc_xml {
1653         my $self = shift;
1654         my $client = shift;
1655         my $xml = shift;
1656
1657         $xml = $parser->parse_string($xml) unless (ref $xml);
1658
1659         return _extract_biblio_descriptors( $xml );
1660 }
1661 __PACKAGE__->register_method(  
1662         api_name        => "open-ils.worm.biblio_leader.xml",
1663         method          => "extract_biblio_desc_xml",
1664         api_level       => 1,
1665         argc            => 1,
1666 );                      
1667
1668 sub extract_biblio_desc_record {
1669         my $self = shift;
1670         my $client = shift;
1671         my $rec = shift;
1672
1673         OpenILS::Application::Ingest->post_init();
1674         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1675
1676         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1677         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1678         return $d;
1679 }
1680 __PACKAGE__->register_method(  
1681         api_name        => "open-ils.worm.biblio_leader.record",
1682         method          => "extract_biblio_desc_record",
1683         api_level       => 1,
1684         argc            => 1,
1685 );                      
1686
1687 # --------------------------------------------------------------------------------
1688 # Flat MARC
1689
1690 package OpenILS::Application::Ingest::FlatMARC;
1691 use base qw/OpenILS::Application::Ingest/;
1692 use Unicode::Normalize;
1693
1694
1695 sub _marcxml_to_full_rows {
1696
1697         my $marcxml = shift;
1698         my $xmltype = shift || 'metabib';
1699
1700         my $type = "Fieldmapper::${xmltype}::full_rec";
1701
1702         my @ns_list;
1703         
1704         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1705
1706         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1707                 next unless $tagline;
1708
1709                 my $ns = $type->new;
1710
1711                 $ns->tag( 'LDR' );
1712                 my $val = $tagline->textContent;
1713                 $val = NFD($val);
1714                 $val =~ s/\pM+//sgo;
1715                 $val =~ s/\pC+//sgo;
1716                 $val =~ s/\W+$//sgo;
1717                 $ns->value( $val );
1718
1719                 push @ns_list, $ns;
1720         }
1721
1722         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1723                 next unless $tagline;
1724
1725                 my $ns = $type->new;
1726
1727                 $ns->tag( $tagline->getAttribute( "tag" ) );
1728                 my $val = $tagline->textContent;
1729                 $val = NFD($val);
1730                 $val =~ s/\pM+//sgo;
1731                 $val =~ s/\pC+//sgo;
1732                 $val =~ s/\W+$//sgo;
1733                 $ns->value( $val );
1734
1735                 push @ns_list, $ns;
1736         }
1737
1738         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1739                 next unless $tagline;
1740
1741                 my $tag = $tagline->getAttribute( "tag" );
1742                 my $ind1 = $tagline->getAttribute( "ind1" );
1743                 my $ind2 = $tagline->getAttribute( "ind2" );
1744
1745                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1746                         next unless $data;
1747
1748                         my $ns = $type->new;
1749
1750                         $ns->tag( $tag );
1751                         $ns->ind1( $ind1 );
1752                         $ns->ind2( $ind2 );
1753                         $ns->subfield( $data->getAttribute( "code" ) );
1754                         my $val = $data->textContent;
1755                         $val = NFD($val);
1756                         $val =~ s/\pM+//sgo;
1757                         $val =~ s/\pC+//sgo;
1758                         $val =~ s/\W+$//sgo;
1759                         $ns->value( lc($val) );
1760
1761                         push @ns_list, $ns;
1762                 }
1763         }
1764
1765         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1766         return @ns_list;
1767 }
1768
1769 sub flat_marc_xml {
1770         my $self = shift;
1771         my $client = shift;
1772         my $xml = shift;
1773
1774         $xml = $parser->parse_string($xml) unless (ref $xml);
1775
1776         my $type = 'metabib';
1777         $type = 'authority' if ($self->api_name =~ /authority/o);
1778
1779         OpenILS::Application::Ingest->post_init();
1780
1781         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1782         return undef;
1783 }
1784 __PACKAGE__->register_method(  
1785         api_name        => "open-ils.worm.flat_marc.authority.xml",
1786         method          => "flat_marc_xml",
1787         api_level       => 1,
1788         argc            => 1,
1789         stream          => 1,
1790 );                      
1791 __PACKAGE__->register_method(  
1792         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1793         method          => "flat_marc_xml",
1794         api_level       => 1,
1795         argc            => 1,
1796         stream          => 1,
1797 );                      
1798
1799 sub flat_marc_record {
1800         my $self = shift;
1801         my $client = shift;
1802         my $rec = shift;
1803
1804         my $type = 'biblio';
1805         $type = 'authority' if ($self->api_name =~ /authority/o);
1806
1807         OpenILS::Application::Ingest->post_init();
1808         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1809
1810         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1811         return undef;
1812 }
1813 __PACKAGE__->register_method(  
1814         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1815         method          => "flat_marc_record",
1816         api_level       => 1,
1817         argc            => 1,
1818         stream          => 1,
1819 );                      
1820 __PACKAGE__->register_method(  
1821         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1822         method          => "flat_marc_record",
1823         api_level       => 1,
1824         argc            => 1,
1825         stream          => 1,
1826 );                      
1827
1828
1829 # --------------------------------------------------------------------------------
1830 # Fingerprinting
1831
1832 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1833 use base qw/OpenILS::Application::Ingest/;
1834 use Unicode::Normalize;
1835 use OpenSRF::EX qw/:try/;
1836
1837 my @fp_mods_xpath = (
1838         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1839                         title   => {
1840                                         xpath   => [
1841                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1842                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1843                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1844                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1845                                         ],
1846                                         fixup   => sub {
1847                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1848                                                         $text = NFD($text);
1849                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1850                                                         $text =~ s/\pM+//gso;
1851                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1852                                                         $text = lc($text);
1853                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1854                                                         $text =~ s/\s+/ /sgo;
1855                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1856                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1857                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1858                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1859                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1860                                                         $text =~ s/\[.[^\]]+\]//sgo;
1861                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1862                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1863                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1864                                                 },
1865                         },
1866                         author  => {
1867                                         xpath   => [
1868                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1869                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1870                                         ],
1871                                         fixup   => sub {
1872                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1873                                                         $text = NFD($text);
1874                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1875                                                         $text =~ s/\pM+//gso;
1876                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1877                                                         $text = lc($text);
1878                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1879                                                         $text =~ s/\s+/ /sgo;
1880                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1881                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1882                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1883                                                         $text =~ s/,?\s+.*$//sgo;
1884                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1885                                                 },
1886                         },
1887         ],
1888
1889         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1890                         title   => {
1891                                         xpath   => [
1892                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1893                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1894                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1895                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1896                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1897                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1898                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1899                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1900                                         ],
1901                                         fixup   => sub {
1902                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1903                                                         $text = NFD($text);
1904                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1905                                                         $text =~ s/\pM+//gso;
1906                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1907                                                         $text = lc($text);
1908                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1909                                                         $text =~ s/\s+/ /sgo;
1910                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1911                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1912                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1913                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1914                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1915                                                         $text =~ s/\[.[^\]]+\]//sgo;
1916                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1917                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1918                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1919                                                 },
1920                         },
1921                         author  => {
1922                                         xpath   => [
1923                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1924                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1925                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1926                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1927                                         ],
1928                                         fixup   => sub {
1929                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1930                                                         $text = NFD($text);
1931                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1932                                                         $text =~ s/\pM+//gso;
1933                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1934                                                         $text = lc($text);
1935                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1936                                                         $text =~ s/\s+/ /sgo;
1937                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1938                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1939                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1940                                                         $text =~ s/,?\s+.*$//sgo;
1941                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1942                                                 },
1943                         },
1944         ],
1945
1946 );
1947
1948 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1949
1950 sub _fp_mods {
1951         my $mods = shift;
1952         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1953
1954         my $fp_string = '';
1955
1956         my $match_index = 0;
1957         my $block_index = 1;
1958         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1959                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1960
1961                         my $block_name_index = 0;
1962                         my $block_value_index = 1;
1963                         my $block = $fp_mods_xpath[$block_index];
1964                         while ( my $part = $$block[$block_value_index] ) {
1965                                 local $text;
1966                                 for my $xpath ( @{ $part->{xpath} } ) {
1967                                         $text = $mods->findvalue( $xpath );
1968                                         last if ($text);
1969                                 }
1970
1971                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1972
1973                                 if ($text) {
1974                                         $$part{fixup}->();
1975                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1976                                         $fp_string .= $text;
1977                                 }
1978
1979                                 $block_name_index += 2;
1980                                 $block_value_index += 2;
1981                         }
1982                 }
1983                 if ($fp_string) {
1984                         $fp_string =~ s/\W+//gso;
1985                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1986                         return $fp_string;
1987                 }
1988
1989                 $match_index += 2;
1990                 $block_index += 2;
1991         }
1992         return undef;
1993 }
1994
1995 sub refingerprint_bibrec {
1996         my $self = shift;
1997         my $client = shift;
1998         my $rec = shift;
1999
2000         my $commit = 0;
2001         if (!OpenILS::Application::Ingest->in_transaction) {
2002                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2003                 $commit = 1;
2004         }
2005
2006         my $success = 1;
2007         try {
2008                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2009                 for my $b (@$bibs) {
2010                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2011
2012                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2013
2014                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2015
2016                                 OpenILS::Application::Ingest->storage_req(
2017                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2018                                         { id => $b->id },
2019                                         { fingerprint => $fp->{fingerprint},
2020                                           quality     => $fp->{quality} }
2021                                 );
2022
2023                                 if ($self->api_name !~ /nomap/o) {
2024                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2025                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2026                                                 $b->id
2027                                         );
2028
2029                                         my $old_mrid;
2030                                         if (ref($old_source_map) and @$old_source_map) {
2031                                                 for my $m (@$old_source_map) {
2032                                                         $old_mrid = $m->metarecord;
2033                                                         OpenILS::Application::Ingest->storage_req(
2034                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2035                                                                 $m->id
2036                                                         );
2037                                                 }
2038                                         }
2039
2040                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2041                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2042                                                         { metarecord => $old_mrid }
2043                                         ) if ($old_mrid);
2044
2045                                         if (ref($old_sm) and @$old_sm == 0) {
2046                                                 OpenILS::Application::Ingest->storage_req(
2047                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2048                                                         $old_mrid
2049                                                 );
2050                                         }
2051
2052                                         my $mr = OpenILS::Application::Ingest->storage_req(
2053                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2054                                                         { fingerprint => $fp->{fingerprint} }
2055                                         )->[0];
2056                                 
2057                                         unless ($mr) {
2058                                                 $mr = Fieldmapper::metabib::metarecord->new;
2059                                                 $mr->fingerprint( $fp->{fingerprint} );
2060                                                 $mr->master_record( $b->id );
2061                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2062                                         }
2063
2064                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2065                                         $mr_map->metarecord( $mr->id );
2066                                         $mr_map->source( $b->id );
2067                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2068
2069                                 }
2070                         }
2071                         $client->respond($b->id);
2072                 }
2073
2074         } otherwise {
2075                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2076                 $success = 0;
2077         };
2078
2079         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2080         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2081         return undef;
2082 }
2083 __PACKAGE__->register_method(  
2084         api_name        => "open-ils.worm.fingerprint.record.update",
2085         method          => "refingerprint_bibrec",
2086         api_level       => 1,
2087         argc            => 1,
2088         stream          => 1,
2089 );                      
2090
2091 __PACKAGE__->register_method(  
2092         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2093         method          => "refingerprint_bibrec",
2094         api_level       => 1,
2095         argc            => 1,
2096 );                      
2097
2098 =comment
2099
2100 sub fingerprint_bibrec {
2101         my $self = shift;
2102         my $client = shift;
2103         my $rec = shift;
2104
2105         OpenILS::Application::Ingest->post_init();
2106         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2107
2108         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2109         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2110         return $fp;
2111
2112 }
2113 __PACKAGE__->register_method(  
2114         api_name        => "open-ils.worm.fingerprint.record",
2115         method          => "fingerprint_bibrec",
2116         api_level       => 0,
2117         argc            => 1,
2118 );                      
2119
2120
2121 sub fingerprint_mods {
2122         my $self = shift;
2123         my $client = shift;
2124         my $xml = shift;
2125
2126         OpenILS::Application::Ingest->post_init();
2127         my $mods = $parser->parse_string($xml)->documentElement;
2128
2129         return _fp_mods( $mods );
2130 }
2131 __PACKAGE__->register_method(  
2132         api_name        => "open-ils.worm.fingerprint.mods",
2133         method          => "fingerprint_mods",
2134         api_level       => 1,
2135         argc            => 1,
2136 );                      
2137
2138 sub fingerprint_marc {
2139         my $self = shift;
2140         my $client = shift;
2141         my $xml = shift;
2142
2143         $xml = $parser->parse_string($xml) unless (ref $xml);
2144
2145         OpenILS::Application::Ingest->post_init();
2146         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2147         $log->debug("Returning [$fp] as fingerprint", INFO);
2148         return $fp;
2149 }
2150 __PACKAGE__->register_method(  
2151         api_name        => "open-ils.worm.fingerprint.marc",
2152         method          => "fingerprint_marc",
2153         api_level       => 1,
2154         argc            => 1,
2155 );                      
2156
2157
2158 =cut
2159
2160 sub biblio_fingerprint_record {
2161         my $self = shift;
2162         my $client = shift;
2163         my $rec = shift;
2164
2165         OpenILS::Application::Ingest->post_init();
2166
2167         my $marc = OpenILS::Application::Ingest
2168                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2169                         ->marc;
2170
2171         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2172         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2173         return $fp;
2174 }
2175 __PACKAGE__->register_method(  
2176         api_name        => "open-ils.worm.fingerprint.record",
2177         method          => "biblio_fingerprint_record",
2178         api_level       => 1,
2179         argc            => 1,
2180 );                      
2181
2182 our $fp_script;
2183 sub biblio_fingerprint {
2184         my $self = shift;
2185         my $client = shift;
2186         my $marc = shift;
2187
2188         OpenILS::Application::Ingest->post_init();
2189
2190         $marc = $parser->parse_string($marc) unless (ref $marc);
2191
2192         my $mods = OpenILS::Application::Ingest::entityize(
2193                 $mods_sheet
2194                         ->transform( $marc )
2195                         ->documentElement
2196                         ->toString,
2197                 'D'
2198         );
2199
2200         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2201
2202         warn $marc;
2203         $log->internal("Got MARC [$marc]");
2204         $log->internal("Created MODS [$mods]");
2205
2206         if(!$fp_script) {
2207                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2208                 my $conf = OpenSRF::Utils::SettingsClient->new;
2209
2210                 my $libs        = $conf->config_value(@pfx, 'script_path');
2211                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2212                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2213
2214                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2215                 
2216                 $fp_script = new OpenILS::Utils::ScriptRunner
2217                         ( file          => $script_file,
2218                           paths         => $script_libs,
2219                           reset_count   => 1000 );
2220         }
2221
2222         $log->debug("Applying environment for biblio fingerprinting...");
2223
2224         my $env = {marc => $marc, mods => $mods};
2225         #my $res = {fingerprint => '', quality => '0'};
2226
2227         $fp_script->insert('environment' => $env);
2228         #$fp_script->insert('result' => $res);
2229
2230         $log->debug("Running script for biblio fingerprinting...");
2231
2232         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2233
2234         $log->debug("Script for biblio fingerprinting completed successfully...");
2235
2236         return $res;
2237 }
2238 __PACKAGE__->register_method(  
2239         api_name        => "open-ils.worm.fingerprint.marc",
2240         method          => "biblio_fingerprint",
2241         api_level       => 1,
2242         argc            => 1,
2243 );                      
2244
2245 # --------------------------------------------------------------------------------
2246
2247 1;
2248
2249 __END__
2250 my $in_xact;
2251 my $begin;
2252 my $commit;
2253 my $rollback;
2254 my $lookup;
2255 my $update_entry;
2256 my $mr_lookup;
2257 my $mr_update;
2258 my $mr_create;
2259 my $create_source_map;
2260 my $sm_lookup;
2261 my $rm_old_rd;
2262 my $rm_old_sm;
2263 my $rm_old_fr;
2264 my $rm_old_tr;
2265 my $rm_old_ar;
2266 my $rm_old_sr;
2267 my $rm_old_kr;
2268 my $rm_old_ser;
2269
2270 my $fr_create;
2271 my $rd_create;
2272 my $create = {};
2273
2274 my %descriptor_code = (
2275         item_type => 'substr($ldr,6,1)',
2276         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2277         bib_level => 'substr($ldr,7,1)',
2278         control_type => 'substr($ldr,8,1)',
2279         char_encoding => 'substr($ldr,9,1)',
2280         enc_level => 'substr($ldr,17,1)',
2281         cat_form => 'substr($ldr,18,1)',
2282         pub_status => 'substr($ldr,5,1)',
2283         item_lang => 'substr($oo8,35,3)',
2284         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2285         audience => 'substr($oo8,22,1)',
2286 );
2287
2288 sub wormize {
2289
2290         my $self = shift;
2291         my $client = shift;
2292         my @docids = @_;
2293
2294         my $no_map = 0;
2295         if ($self->api_name =~ /no_map/o) {
2296                 $no_map = 1;
2297         }
2298
2299         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2300                 unless ($in_xact);
2301         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2302                 unless ($begin);
2303         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2304                 unless ($commit);
2305         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2306                 unless ($rollback);
2307         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2308                 unless ($sm_lookup);
2309         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2310                 unless ($mr_lookup);
2311         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2312                 unless ($mr_update);
2313         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2314                 unless ($lookup);
2315         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2316                 unless ($update_entry);
2317         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2318                 unless ($rm_old_sm);
2319         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2320                 unless ($rm_old_rd);
2321         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2322                 unless ($rm_old_fr);
2323         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2324                 unless ($rm_old_tr);
2325         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2326                 unless ($rm_old_ar);
2327         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2328                 unless ($rm_old_sr);
2329         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2330                 unless ($rm_old_kr);
2331         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2332                 unless ($rm_old_ser);
2333         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2334                 unless ($mr_create);
2335         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2336                 unless ($create_source_map);
2337         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2338                 unless ($rd_create);
2339         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2340                 unless ($fr_create);
2341         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2342                 unless ($$create{title});
2343         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2344                 unless ($$create{author});
2345         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2346                 unless ($$create{subject});
2347         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2348                 unless ($$create{keyword});
2349         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2350                 unless ($$create{series});
2351
2352
2353         my ($outer_xact) = $in_xact->run;
2354         try {
2355                 unless ($outer_xact) {
2356                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2357                         my ($r) = $begin->run($client);
2358                         unless (defined $r and $r) {
2359                                 $rollback->run;
2360                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2361                         }
2362                 }
2363         } catch Error with {
2364                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2365         };
2366
2367         my @source_maps;
2368         my @entry_list;
2369         my @mr_list;
2370         my @rd_list;
2371         my @ns_list;
2372         my @mods_data;
2373         my $ret = 0;
2374         for my $entry ( $lookup->run(@docids) ) {
2375                 # step -1: grab the doc from storage
2376                 next unless ($entry);
2377
2378                 if(!$mods_sheet) {
2379                         my $xslt_doc = $parser->parse_file(
2380                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2381                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2382                 }
2383
2384                 my $xml = $entry->marc;
2385                 my $docid = $entry->id;
2386                 my $marcdoc = $parser->parse_string($xml);
2387                 my $modsdoc = $mods_sheet->transform($marcdoc);
2388
2389                 my $mods = $modsdoc->documentElement;
2390                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2391
2392                 $entry->fingerprint( fingerprint_mods( $mods ) );
2393                 push @entry_list, $entry;
2394
2395                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2396
2397                 unless ($no_map) {
2398                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2399                         if (!$mr || !@$mr) {
2400                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2401                                 $mr = new Fieldmapper::metabib::metarecord;
2402                                 $mr->fingerprint( $entry->fingerprint );
2403                                 $mr->master_record( $entry->id );
2404                                 my ($new_mr) = $mr_create->run($mr);
2405                                 $mr->id($new_mr);
2406                                 unless (defined $mr) {
2407                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2408                                 }
2409                         } else {
2410                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2411                                 $mr->mods('');
2412                                 push @mr_list, $mr;
2413                         }
2414
2415                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2416                         $sm->metarecord( $mr->id );
2417                         $sm->source( $entry->id );
2418                         push @source_maps, $sm;
2419                 }
2420
2421                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2422                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2423
2424                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2425                 for my $rd_field ( keys %descriptor_code ) {
2426                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2427                 }
2428                 $rd_obj->record( $docid );
2429                 push @rd_list, $rd_obj;
2430
2431                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2432
2433                 # step 2: build the KOHA rows
2434                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2435                 $_->record( $docid ) for (@tmp_list);
2436                 push @ns_list, @tmp_list;
2437
2438                 $ret++;
2439
2440                 last unless ($self->api_name =~ /batch$/o);
2441         }
2442
2443         $rm_old_rd->run( { record => \@docids } );
2444         $rm_old_fr->run( { record => \@docids } );
2445         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2446         $rm_old_tr->run( { source => \@docids } );
2447         $rm_old_ar->run( { source => \@docids } );
2448         $rm_old_sr->run( { source => \@docids } );
2449         $rm_old_kr->run( { source => \@docids } );
2450         $rm_old_ser->run( { source => \@docids } );
2451
2452         unless ($no_map) {
2453                 my ($sm) = $create_source_map->run(@source_maps);
2454                 unless (defined $sm) {
2455                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2456                 }
2457                 my ($mr) = $mr_update->run(@mr_list);
2458                 unless (defined $mr) {
2459                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2460                 }
2461         }
2462
2463         my ($re) = $update_entry->run(@entry_list);
2464         unless (defined $re) {
2465                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2466         }
2467
2468         my ($rd) = $rd_create->run(@rd_list);
2469         unless (defined $rd) {
2470                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2471         }
2472
2473         my ($fr) = $fr_create->run(@ns_list);
2474         unless (defined $fr) {
2475                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2476         }
2477
2478         # step 5: insert the new metadata
2479         for my $class ( qw/title author subject keyword series/ ) {
2480                 my @md_list = ();
2481                 for my $doc ( @mods_data ) {
2482                         my ($did) = keys %$doc;
2483                         my ($data) = values %$doc;
2484
2485                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2486                         for my $row ( keys %{ $$data{$class} } ) {
2487                                 next unless (exists $$data{$class}{$row});
2488                                 next unless ($$data{$class}{$row}{value});
2489                                 my $fm_obj = $fm_constructor->new;
2490                                 $fm_obj->value( $$data{$class}{$row}{value} );
2491                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2492                                 $fm_obj->source( $did );
2493                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2494
2495                                 push @md_list, $fm_obj;
2496                         }
2497                 }
2498                         
2499                 my ($cr) = $$create{$class}->run(@md_list);
2500                 unless (defined $cr) {
2501                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2502                 }
2503         }
2504
2505         unless ($outer_xact) {
2506                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2507                 my ($c) = $commit->run;
2508                 unless (defined $c and $c) {
2509                         $rollback->run;
2510                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2511                 }
2512         }
2513
2514         return $ret;
2515 }
2516 __PACKAGE__->register_method( 
2517         api_name        => "open-ils.worm.wormize",
2518         method          => "wormize",
2519         api_level       => 1,
2520         argc            => 1,
2521 );
2522 __PACKAGE__->register_method( 
2523         api_name        => "open-ils.worm.wormize.no_map",
2524         method          => "wormize",
2525         api_level       => 1,
2526         argc            => 1,
2527 );
2528 __PACKAGE__->register_method( 
2529         api_name        => "open-ils.worm.wormize.batch",
2530         method          => "wormize",
2531         api_level       => 1,
2532         argc            => 1,
2533 );
2534 __PACKAGE__->register_method( 
2535         api_name        => "open-ils.worm.wormize.no_map.batch",
2536         method          => "wormize",
2537         api_level       => 1,
2538         argc            => 1,
2539 );
2540
2541
2542 my $ain_xact;
2543 my $abegin;
2544 my $acommit;
2545 my $arollback;
2546 my $alookup;
2547 my $aupdate_entry;
2548 my $amr_lookup;
2549 my $amr_update;
2550 my $amr_create;
2551 my $acreate_source_map;
2552 my $asm_lookup;
2553 my $arm_old_rd;
2554 my $arm_old_sm;
2555 my $arm_old_fr;
2556 my $arm_old_tr;
2557 my $arm_old_ar;
2558 my $arm_old_sr;
2559 my $arm_old_kr;
2560 my $arm_old_ser;
2561
2562 my $afr_create;
2563 my $ard_create;
2564 my $acreate = {};
2565
2566 sub authority_wormize {
2567
2568         my $self = shift;
2569         my $client = shift;
2570         my @docids = @_;
2571
2572         my $no_map = 0;
2573         if ($self->api_name =~ /no_map/o) {
2574                 $no_map = 1;
2575         }
2576
2577         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2578                 unless ($in_xact);
2579         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2580                 unless ($begin);
2581         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2582                 unless ($commit);
2583         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2584                 unless ($rollback);
2585         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2586                 unless ($alookup);
2587         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2588                 unless ($aupdate_entry);
2589         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2590                 unless ($arm_old_rd);
2591         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2592                 unless ($arm_old_fr);
2593         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2594                 unless ($ard_create);
2595         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2596                 unless ($afr_create);
2597
2598
2599         my ($outer_xact) = $in_xact->run;
2600         try {
2601                 unless ($outer_xact) {
2602                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2603                         my ($r) = $begin->run($client);
2604                         unless (defined $r and $r) {
2605                                 $rollback->run;
2606                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2607                         }
2608                 }
2609         } catch Error with {
2610                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2611         };
2612
2613         my @source_maps;
2614         my @entry_list;
2615         my @mr_list;
2616         my @rd_list;
2617         my @ns_list;
2618         my @mads_data;
2619         my $ret = 0;
2620         for my $entry ( $lookup->run(@docids) ) {
2621                 # step -1: grab the doc from storage
2622                 next unless ($entry);
2623
2624                 #if(!$mads_sheet) {
2625                 #       my $xslt_doc = $parser->parse_file(
2626                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2627                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2628                 #}
2629
2630                 my $xml = $entry->marc;
2631                 my $docid = $entry->id;
2632                 my $marcdoc = $parser->parse_string($xml);
2633                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2634
2635                 #my $mads = $madsdoc->documentElement;
2636                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2637
2638                 push @entry_list, $entry;
2639
2640                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2641                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2642
2643                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2644                 for my $rd_field ( keys %descriptor_code ) {
2645                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2646                 }
2647                 $rd_obj->record( $docid );
2648                 push @rd_list, $rd_obj;
2649
2650                 # step 2: build the KOHA rows
2651                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2652                 $_->record( $docid ) for (@tmp_list);
2653                 push @ns_list, @tmp_list;
2654
2655                 $ret++;
2656
2657                 last unless ($self->api_name =~ /batch$/o);
2658         }
2659
2660         $arm_old_rd->run( { record => \@docids } );
2661         $arm_old_fr->run( { record => \@docids } );
2662
2663         my ($rd) = $ard_create->run(@rd_list);
2664         unless (defined $rd) {
2665                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2666         }
2667
2668         my ($fr) = $fr_create->run(@ns_list);
2669         unless (defined $fr) {
2670                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2671         }
2672
2673         unless ($outer_xact) {
2674                 $log->debug("Commiting transaction started by Ingest.", INFO);
2675                 my ($c) = $commit->run;
2676                 unless (defined $c and $c) {
2677                         $rollback->run;
2678                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2679                 }
2680         }
2681
2682         return $ret;
2683 }
2684 __PACKAGE__->register_method( 
2685         api_name        => "open-ils.worm.authortiy.wormize",
2686         method          => "wormize",
2687         api_level       => 1,
2688         argc            => 1,
2689 );
2690 __PACKAGE__->register_method( 
2691         api_name        => "open-ils.worm.authority.wormize.batch",
2692         method          => "wormize",
2693         api_level       => 1,
2694         argc            => 1,
2695 );
2696
2697
2698 # --------------------------------------------------------------------------------
2699
2700
2701 sub _marcxml_to_full_rows {
2702
2703         my $marcxml = shift;
2704         my $type = shift || 'Fieldmapper::metabib::full_rec';
2705
2706         my @ns_list;
2707         
2708         my $root = $marcxml->documentElement;
2709
2710         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2711                 next unless $tagline;
2712
2713                 my $ns = new Fieldmapper::metabib::full_rec;
2714
2715                 $ns->tag( 'LDR' );
2716                 my $val = NFD($tagline->textContent);
2717                 $val =~ s/(\pM+)//gso;
2718                 $ns->value( $val );
2719
2720                 push @ns_list, $ns;
2721         }
2722
2723         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2724                 next unless $tagline;
2725
2726                 my $ns = new Fieldmapper::metabib::full_rec;
2727
2728                 $ns->tag( $tagline->getAttribute( "tag" ) );
2729                 my $val = NFD($tagline->textContent);
2730                 $val =~ s/(\pM+)//gso;
2731                 $ns->value( $val );
2732
2733                 push @ns_list, $ns;
2734         }
2735
2736         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2737                 next unless $tagline;
2738
2739                 my $tag = $tagline->getAttribute( "tag" );
2740                 my $ind1 = $tagline->getAttribute( "ind1" );
2741                 my $ind2 = $tagline->getAttribute( "ind2" );
2742
2743                 for my $data ( $tagline->childNodes ) {
2744                         next unless $data;
2745
2746                         my $ns = $type->new;
2747
2748                         $ns->tag( $tag );
2749                         $ns->ind1( $ind1 );
2750                         $ns->ind2( $ind2 );
2751                         $ns->subfield( $data->getAttribute( "code" ) );
2752                         my $val = NFD($data->textContent);
2753                         $val =~ s/(\pM+)//gso;
2754                         $ns->value( lc($val) );
2755
2756                         push @ns_list, $ns;
2757                 }
2758         }
2759         return @ns_list;
2760 }
2761
2762 sub _get_field_value {
2763
2764         my( $root, $xpath ) = @_;
2765
2766         my $string = "";
2767
2768         # grab the set of matching nodes
2769         my @nodes = $root->findnodes( $xpath );
2770         for my $value (@nodes) {
2771
2772                 # grab all children of the node
2773                 my @children = $value->childNodes();
2774                 for my $child (@children) {
2775
2776                         # add the childs content to the growing buffer
2777                         my $content = quotemeta($child->textContent);
2778                         next if ($string =~ /$content/);  # uniquify the values
2779                         $string .= $child->textContent . " ";
2780                 }
2781                 if( ! @children ) {
2782                         $string .= $value->textContent . " ";
2783                 }
2784         }
2785         $string = NFD($string);
2786         $string =~ s/(\pM)//gso;
2787         return lc($string);
2788 }
2789
2790
2791 sub modsdoc_to_values {
2792         my( $self, $mods ) = @_;
2793         my $data = {};
2794         for my $class (keys %$xpathset) {
2795                 $data->{$class} = {};
2796                 for my $type (keys %{$xpathset->{$class}}) {
2797                         $data->{$class}->{$type} = {};
2798                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2799                 }
2800         }
2801         return $data;
2802 }
2803
2804
2805 1;
2806
2807