]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
protecting against insert/update race condition
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21 our %supported_formats = (
22         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
23         mods    => {ns => 'http://www.loc.gov/mods/'},
24         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
26         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28         atom    => {ns => 'http://www.w3.org/2005/Atom'},
29         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
30         rss092  => {ns => ''},
31         rss093  => {ns => ''},
32         rss094  => {ns => ''},
33         rss10   => {ns => 'http://purl.org/rss/1.0/'},
34         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
35         rss2    => {ns => ''},
36 );
37
38
39 my $log = 'OpenSRF::Utils::Logger';
40
41 my  $parser = XML::LibXML->new();
42 my  $xslt = XML::LibXSLT->new();
43
44 my  $mods_sheet;
45 my  $mads_sheet;
46 my  $xpathset = {};
47 sub initialize {}
48 sub child_init {}
49
50 sub post_init {
51
52         unless (keys %$xpathset) {
53                 $log->debug("Running post_init", DEBUG);
54
55                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56
57                 unless ($supported_formats{mods}{xslt}) {
58                         $log->debug("Loading MODS XSLT", DEBUG);
59                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61                 }
62
63                 unless ($supported_formats{mods3}{xslt}) {
64                         $log->debug("Loading MODS v3 XSLT", DEBUG);
65                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67                 }
68
69
70                 my $req = OpenSRF::AppSession
71                                 ->create('open-ils.cstore')
72                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
73                                 ->gather(1);
74
75                 if (ref $req and @$req) {
76                         for my $f (@$req) {
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
81                         }
82                 }
83         }
84 }
85
86 sub entityize {
87         my $stuff = shift;
88         my $form = shift;
89
90         if ($form eq 'D') {
91                 $stuff = NFD($stuff);
92         } else {
93                 $stuff = NFC($stuff);
94         }
95
96         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
97         return $stuff;
98 }
99
100 # --------------------------------------------------------------------------------
101 # Biblio ingest
102
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
106
107 sub rw_biblio_ingest_single_object {
108         my $self = shift;
109         my $client = shift;
110         my $bib = shift;
111
112         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113         return undef unless ($blob);
114
115         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116         $bib->quality( $blob->{fingerprint}->{quality} );
117
118         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
119
120         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
121
122         # update full_rec stuff ...
123         my $tmp = $cstore->request(
124                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125                 { record => $bib->id }
126         )->gather(1);
127
128         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
130
131         # update rec_descriptor stuff ...
132         $tmp = $cstore->request(
133                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134                 { record => $bib->id }
135         )->gather(1);
136
137         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
139
140         # deal with classed fields...
141         for my $class ( qw/title author subject keyword series/ ) {
142                 $tmp = $cstore->request(
143                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144                         { source => $bib->id }
145                 )->gather(1);
146
147                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
148         }
149         for my $obj ( @{ $blob->{field_entries} } ) {
150                 my $class = $obj->class_name;
151                 $class =~ s/^Fieldmapper:://o;
152                 $class =~ s/::/./go;
153                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
154         }
155
156         # update MR map ...
157
158         $tmp = $cstore->request(
159                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160                 { source => $bib->id }
161         )->gather(1);
162
163         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
164
165
166         # Get the matchin MR, if any.
167         my $mr = $cstore->request(
168                 'open-ils.cstore.direct.metabib.metarecord.search',
169                 { fingerprint => $bib->fingerprint }
170         )->gather(1);
171
172         if (!$mr) {
173                 $mr = new Fieldmapper::metabib::metarecord;
174                 $mr->fingerprint( $bib->fingerprint );
175                 $mr->master_record( $bib->id );
176                 $mr->id(
177                         $cstore->request(
178                                 "open-ils.cstore.direct.metabib.metarecord.create",
179                                 $mr => { quiet => 'true' }
180                         )->gather(1)
181                 );
182         } else {
183                 my $mrm = $cstore->request(
184                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185                         { metarecord => $mr->id }
186                 )->gather(1);
187
188                 my $best = $cstore->request(
189                         "open-ils.cstore.direct.biblio.record_entry.search",
190                         { id => [ map { $_->source } @$mrm ] },
191                         { 'select'      => { bre => [ qw/id quality/ ] },
192                           order_by      => { bre => "quality desc" },
193                           limit         => 1,
194                         }
195                 )->gather(1);
196
197                 if ($best->quality > $bib->quality) {
198                         $mr->master_record($best->id);
199                 } else {
200                         $mr->master_record($bib->id);
201                 }
202
203                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
204         }
205
206         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
207         $mrm->source($bib->id);
208         $mrm->metarecord($mr->id);
209
210         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
211         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
212
213         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
214
215         return $bib->id;
216 }
217 __PACKAGE__->register_method(  
218         api_name        => "open-ils.ingest.full.biblio.object",
219         method          => "rw_biblio_ingest_single_object",
220         api_level       => 1,
221         argc            => 1,
222 );                      
223
224 sub rw_biblio_ingest_single_record {
225         my $self = shift;
226         my $client = shift;
227         my $rec = shift;
228
229         OpenILS::Application::Ingest->post_init();
230         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
231         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
232
233         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
234
235         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
236         $cstore->disconnect;
237
238         return undef unless ($r and @$r);
239
240         return $self->method_lookup("open-ils.ingest.full.biblio.object")->run($r);
241 }
242 __PACKAGE__->register_method(  
243         api_name        => "open-ils.ingest.full.biblio.record",
244         method          => "rw_biblio_ingest_single_record",
245         api_level       => 1,
246         argc            => 1,
247 );                      
248
249 sub ro_biblio_ingest_single_object {
250         my $self = shift;
251         my $client = shift;
252         my $bib = shift;
253         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
254
255         my $document = $parser->parse_string($xml);
256
257         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
258         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
259         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
260         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
261
262         $_->source($bib->id) for (@mXfe);
263         $_->record($bib->id) for (@mfr);
264         $rd->record($bib->id) if ($rd);
265
266         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
267 }
268 __PACKAGE__->register_method(  
269         api_name        => "open-ils.ingest.full.biblio.object.readonly",
270         method          => "ro_biblio_ingest_single_object",
271         api_level       => 1,
272         argc            => 1,
273 );                      
274
275 sub ro_biblio_ingest_single_xml {
276         my $self = shift;
277         my $client = shift;
278         my $xml = OpenILS::Application::Ingest::entityize(shift);
279
280         my $document = $parser->parse_string($xml);
281
282         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
283         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
284         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
285         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
286
287         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
288 }
289 __PACKAGE__->register_method(  
290         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
291         method          => "ro_biblio_ingest_single_xml",
292         api_level       => 1,
293         argc            => 1,
294 );                      
295
296 sub ro_biblio_ingest_single_record {
297         my $self = shift;
298         my $client = shift;
299         my $rec = shift;
300
301         OpenILS::Application::Ingest->post_init();
302         my $r = OpenSRF::AppSession
303                         ->create('open-ils.cstore')
304                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
305                         ->gather(1);
306
307         return undef unless ($r and @$r);
308
309         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
310
311         $_->source($rec) for (@{$res->{field_entries}});
312         $_->record($rec) for (@{$res->{full_rec}});
313         $res->{descriptor}->record($rec);
314
315         return $res;
316 }
317 __PACKAGE__->register_method(  
318         api_name        => "open-ils.ingest.full.biblio.record.readonly",
319         method          => "ro_biblio_ingest_single_record",
320         api_level       => 1,
321         argc            => 1,
322 );                      
323
324 sub ro_biblio_ingest_stream_record {
325         my $self = shift;
326         my $client = shift;
327
328         OpenILS::Application::Ingest->post_init();
329
330         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
331
332         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
333         
334                 my $rec = $resp->content;
335                 last unless (defined $rec);
336
337                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
338                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
339
340                 $_->source($rec) for (@{$res->{field_entries}});
341                 $_->record($rec) for (@{$res->{full_rec}});
342
343                 $client->respond( $res );
344         }
345
346         return undef;
347 }
348 __PACKAGE__->register_method(  
349         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
350         method          => "ro_biblio_ingest_stream_record",
351         api_level       => 1,
352         stream          => 1,
353 );                      
354
355 sub ro_biblio_ingest_stream_xml {
356         my $self = shift;
357         my $client = shift;
358
359         OpenILS::Application::Ingest->post_init();
360
361         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
362
363         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
364         
365                 my $xml = $resp->content;
366                 last unless (defined $xml);
367
368                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
369                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
370
371                 $client->respond( $res );
372         }
373
374         return undef;
375 }
376 __PACKAGE__->register_method(  
377         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
378         method          => "ro_biblio_ingest_stream_xml",
379         api_level       => 1,
380         stream          => 1,
381 );                      
382
383 sub rw_biblio_ingest_stream_import {
384         my $self = shift;
385         my $client = shift;
386
387         OpenILS::Application::Ingest->post_init();
388
389         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
390
391         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
392         
393                 my $bib = $resp->content;
394                 last unless (defined $bib);
395
396                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
397                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
398
399                 $_->source($bib->id) for (@{$res->{field_entries}});
400                 $_->record($bib->id) for (@{$res->{full_rec}});
401
402                 $client->respond( $res );
403         }
404
405         return undef;
406 }
407 __PACKAGE__->register_method(  
408         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
409         method          => "rw_biblio_ingest_stream_import",
410         api_level       => 1,
411         stream          => 1,
412 );                      
413
414
415 # --------------------------------------------------------------------------------
416 # Authority ingest
417
418 package OpenILS::Application::Ingest::Authority;
419 use base qw/OpenILS::Application::Ingest/;
420 use Unicode::Normalize;
421
422 sub ro_authority_ingest_single_object {
423         my $self = shift;
424         my $client = shift;
425         my $bib = shift;
426         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
427
428         my $document = $parser->parse_string($xml);
429
430         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
431
432         $_->record($bib->id) for (@mfr);
433
434         return { full_rec => \@mfr };
435 }
436 __PACKAGE__->register_method(  
437         api_name        => "open-ils.ingest.full.authority.object.readonly",
438         method          => "ro_authority_ingest_single_object",
439         api_level       => 1,
440         argc            => 1,
441 );                      
442
443 sub ro_authority_ingest_single_xml {
444         my $self = shift;
445         my $client = shift;
446         my $xml = OpenILS::Application::Ingest::entityize(shift);
447
448         my $document = $parser->parse_string($xml);
449
450         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
451
452         return { full_rec => \@mfr };
453 }
454 __PACKAGE__->register_method(  
455         api_name        => "open-ils.ingest.full.authority.xml.readonly",
456         method          => "ro_authority_ingest_single_xml",
457         api_level       => 1,
458         argc            => 1,
459 );                      
460
461 sub ro_authority_ingest_single_record {
462         my $self = shift;
463         my $client = shift;
464         my $rec = shift;
465
466         OpenILS::Application::Ingest->post_init();
467         my $r = OpenSRF::AppSession
468                         ->create('open-ils.cstore')
469                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
470                         ->gather(1);
471
472         return undef unless ($r and @$r);
473
474         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
475
476         $_->record($rec) for (@{$res->{full_rec}});
477         $res->{descriptor}->record($rec);
478
479         return $res;
480 }
481 __PACKAGE__->register_method(  
482         api_name        => "open-ils.ingest.full.authority.record.readonly",
483         method          => "ro_authority_ingest_single_record",
484         api_level       => 1,
485         argc            => 1,
486 );                      
487
488 sub ro_authority_ingest_stream_record {
489         my $self = shift;
490         my $client = shift;
491
492         OpenILS::Application::Ingest->post_init();
493
494         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
495
496         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
497         
498                 my $rec = $resp->content;
499                 last unless (defined $rec);
500
501                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
502                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
503
504                 $_->record($rec) for (@{$res->{full_rec}});
505
506                 $client->respond( $res );
507         }
508
509         return undef;
510 }
511 __PACKAGE__->register_method(  
512         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
513         method          => "ro_authority_ingest_stream_record",
514         api_level       => 1,
515         stream          => 1,
516 );                      
517
518 sub ro_authority_ingest_stream_xml {
519         my $self = shift;
520         my $client = shift;
521
522         OpenILS::Application::Ingest->post_init();
523
524         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
525
526         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
527         
528                 my $xml = $resp->content;
529                 last unless (defined $xml);
530
531                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
532                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
533
534                 $client->respond( $res );
535         }
536
537         return undef;
538 }
539 __PACKAGE__->register_method(  
540         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
541         method          => "ro_authority_ingest_stream_xml",
542         api_level       => 1,
543         stream          => 1,
544 );                      
545
546 sub rw_authority_ingest_stream_import {
547         my $self = shift;
548         my $client = shift;
549
550         OpenILS::Application::Ingest->post_init();
551
552         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
553
554         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
555         
556                 my $bib = $resp->content;
557                 last unless (defined $bib);
558
559                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
560                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
561
562                 $_->record($bib->id) for (@{$res->{full_rec}});
563
564                 $client->respond( $res );
565         }
566
567         return undef;
568 }
569 __PACKAGE__->register_method(  
570         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
571         method          => "rw_authority_ingest_stream_import",
572         api_level       => 1,
573         stream          => 1,
574 );                      
575
576
577 # --------------------------------------------------------------------------------
578 # MARC index extraction
579
580 package OpenILS::Application::Ingest::XPATH;
581 use base qw/OpenILS::Application::Ingest/;
582 use Unicode::Normalize;
583
584 # give this an XML documentElement and an XPATH expression
585 sub xpath_to_string {
586         my $xml = shift;
587         my $xpath = shift;
588         my $ns_uri = shift;
589         my $ns_prefix = shift;
590         my $unique = shift;
591
592         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
593
594         my $string = "";
595
596         # grab the set of matching nodes
597         my @nodes = $xml->findnodes( $xpath );
598         for my $value (@nodes) {
599
600                 # grab all children of the node
601                 my @children = $value->childNodes();
602                 for my $child (@children) {
603
604                         # add the childs content to the growing buffer
605                         my $content = quotemeta($child->textContent);
606                         next if ($unique && $string =~ /$content/);  # uniquify the values
607                         $string .= $child->textContent . " ";
608                 }
609                 if( ! @children ) {
610                         $string .= $value->textContent . " ";
611                 }
612         }
613         return NFD($string);
614 }
615
616 sub class_index_string_xml {
617         my $self = shift;
618         my $client = shift;
619         my $xml = shift;
620         my @classes = @_;
621
622         OpenILS::Application::Ingest->post_init();
623         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
624
625         my %transform_cache;
626         
627         for my $class (@classes) {
628                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
629                 for my $type ( keys %{ $xpathset->{$class} } ) {
630
631                         my $def = $xpathset->{$class}->{$type};
632                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
633
634                         my $document = $xml;
635
636                         if ($sf->{xslt}) {
637                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
638                                 $transform_cache{$def->{format}} = $document;
639                         }
640
641                         my $value =  xpath_to_string(
642                                         $document->documentElement      => $def->{xpath},
643                                         $sf->{ns}                       => $def->{format},
644                                         1
645                         );
646
647                         next unless $value;
648
649                         $value = NFD($value);
650                         $value =~ s/\pM+//sgo;
651                         $value =~ s/\pC+//sgo;
652                         $value =~ s/\W+$//sgo;
653
654                         $value =~ s/(\w)\.+(\w)/$1$2/sgo;
655                         $value = lc($value);
656
657                         my $fm = $class_constructor->new;
658                         $fm->value( $value );
659                         $fm->field( $xpathset->{$class}->{$type}->{id} );
660                         $client->respond($fm);
661                 }
662         }
663         return undef;
664 }
665 __PACKAGE__->register_method(  
666         api_name        => "open-ils.ingest.field_entry.class.xml",
667         method          => "class_index_string_xml",
668         api_level       => 1,
669         argc            => 2,
670         stream          => 1,
671 );                      
672
673 sub class_index_string_record {
674         my $self = shift;
675         my $client = shift;
676         my $rec = shift;
677         my @classes = shift;
678
679         OpenILS::Application::Ingest->post_init();
680         my $r = OpenSRF::AppSession
681                         ->create('open-ils.cstore')
682                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
683                         ->gather(1);
684
685         return undef unless ($r and @$r);
686
687         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
688                 $fm->source($rec);
689                 $client->respond($fm);
690         }
691         return undef;
692 }
693 __PACKAGE__->register_method(  
694         api_name        => "open-ils.ingest.field_entry.class.record",
695         method          => "class_index_string_record",
696         api_level       => 1,
697         argc            => 2,
698         stream          => 1,
699 );                      
700
701 sub all_index_string_xml {
702         my $self = shift;
703         my $client = shift;
704         my $xml = shift;
705
706         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
707                 $client->respond($fm);
708         }
709         return undef;
710 }
711 __PACKAGE__->register_method(  
712         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
713         method          => "all_index_string_xml",
714         api_level       => 1,
715         argc            => 1,
716         stream          => 1,
717 );                      
718
719 sub all_index_string_record {
720         my $self = shift;
721         my $client = shift;
722         my $rec = shift;
723
724         OpenILS::Application::Ingest->post_init();
725         my $r = OpenSRF::AppSession
726                         ->create('open-ils.cstore')
727                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
728                         ->gather(1);
729
730         return undef unless ($r and @$r);
731
732         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
733                 $fm->source($rec);
734                 $client->respond($fm);
735         }
736         return undef;
737 }
738 __PACKAGE__->register_method(  
739         api_name        => "open-ils.ingest.extract.field_entry.all.record",
740         method          => "all_index_string_record",
741         api_level       => 1,
742         argc            => 1,
743         stream          => 1,
744 );                      
745
746 # --------------------------------------------------------------------------------
747 # Flat MARC
748
749 package OpenILS::Application::Ingest::FlatMARC;
750 use base qw/OpenILS::Application::Ingest/;
751 use Unicode::Normalize;
752
753
754 sub _marcxml_to_full_rows {
755
756         my $marcxml = shift;
757         my $xmltype = shift || 'metabib';
758
759         my $type = "Fieldmapper::${xmltype}::full_rec";
760
761         my @ns_list;
762         
763         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
764
765         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
766                 next unless $tagline;
767
768                 my $ns = $type->new;
769
770                 $ns->tag( 'LDR' );
771                 my $val = $tagline->textContent;
772                 $val = NFD($val);
773                 $val =~ s/\pM+//sgo;
774                 $val =~ s/\pC+//sgo;
775                 $val =~ s/\W+$//sgo;
776                 $ns->value( $val );
777
778                 push @ns_list, $ns;
779         }
780
781         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
782                 next unless $tagline;
783
784                 my $ns = $type->new;
785
786                 $ns->tag( $tagline->getAttribute( "tag" ) );
787                 my $val = $tagline->textContent;
788                 $val = NFD($val);
789                 $val =~ s/\pM+//sgo;
790                 $val =~ s/\pC+//sgo;
791                 $val =~ s/\W+$//sgo;
792                 $ns->value( $val );
793
794                 push @ns_list, $ns;
795         }
796
797         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
798                 next unless $tagline;
799
800                 my $tag = $tagline->getAttribute( "tag" );
801                 my $ind1 = $tagline->getAttribute( "ind1" );
802                 my $ind2 = $tagline->getAttribute( "ind2" );
803
804                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
805                         next unless $data;
806
807                         my $ns = $type->new;
808
809                         $ns->tag( $tag );
810                         $ns->ind1( $ind1 );
811                         $ns->ind2( $ind2 );
812                         $ns->subfield( $data->getAttribute( "code" ) );
813                         my $val = $data->textContent;
814                         $val = NFD($val);
815                         $val =~ s/\pM+//sgo;
816                         $val =~ s/\pC+//sgo;
817                         $val =~ s/\W+$//sgo;
818                         $ns->value( lc($val) );
819
820                         push @ns_list, $ns;
821                 }
822         }
823
824         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
825         return @ns_list;
826 }
827
828 sub flat_marc_xml {
829         my $self = shift;
830         my $client = shift;
831         my $xml = shift;
832
833         $log->debug("processing [$xml]");
834
835         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
836
837         my $type = 'metabib';
838         $type = 'authority' if ($self->api_name =~ /authority/o);
839
840         OpenILS::Application::Ingest->post_init();
841
842         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
843         return undef;
844 }
845 __PACKAGE__->register_method(  
846         api_name        => "open-ils.ingest.flat_marc.authority.xml",
847         method          => "flat_marc_xml",
848         api_level       => 1,
849         argc            => 1,
850         stream          => 1,
851 );                      
852 __PACKAGE__->register_method(  
853         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
854         method          => "flat_marc_xml",
855         api_level       => 1,
856         argc            => 1,
857         stream          => 1,
858 );                      
859
860 sub flat_marc_record {
861         my $self = shift;
862         my $client = shift;
863         my $rec = shift;
864
865         my $type = 'biblio';
866         $type = 'authority' if ($self->api_name =~ /authority/o);
867
868         OpenILS::Application::Ingest->post_init();
869         my $r = OpenSRF::AppSession
870                         ->create('open-ils.cstore')
871                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
872                         ->gather(1);
873
874
875         return undef unless ($r and $r->marc);
876
877         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
878         for my $row (@rows) {
879                 $client->respond($row);
880                 $log->debug(JSON->perl2JSON($row), DEBUG);
881         }
882         return undef;
883 }
884 __PACKAGE__->register_method(  
885         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
886         method          => "flat_marc_record",
887         api_level       => 1,
888         argc            => 1,
889         stream          => 1,
890 );                      
891 __PACKAGE__->register_method(  
892         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
893         method          => "flat_marc_record",
894         api_level       => 1,
895         argc            => 1,
896         stream          => 1,
897 );                      
898
899 # --------------------------------------------------------------------------------
900 # Fingerprinting
901
902 package OpenILS::Application::Ingest::Biblio::Fingerprint;
903 use base qw/OpenILS::Application::Ingest/;
904 use Unicode::Normalize;
905 use OpenSRF::EX qw/:try/;
906
907 sub biblio_fingerprint_record {
908         my $self = shift;
909         my $client = shift;
910         my $rec = shift;
911
912         OpenILS::Application::Ingest->post_init();
913
914         my $r = OpenSRF::AppSession
915                         ->create('open-ils.cstore')
916                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
917                         ->gather(1);
918
919         return undef unless ($r and $r->marc);
920
921         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
922         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
923         $fp->{quality} = int($fp->{quality});
924         return $fp;
925 }
926 __PACKAGE__->register_method(  
927         api_name        => "open-ils.ingest.fingerprint.record",
928         method          => "biblio_fingerprint_record",
929         api_level       => 1,
930         argc            => 1,
931 );                      
932
933 our $fp_script;
934 sub biblio_fingerprint {
935         my $self = shift;
936         my $client = shift;
937         my $xml = OpenILS::Application::Ingest::entityize(shift);
938
939         $log->internal("Got MARC [$xml]");
940
941         if(!$fp_script) {
942                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
943                 my $conf = OpenSRF::Utils::SettingsClient->new;
944
945                 my $libs        = $conf->config_value(@pfx, 'script_path');
946                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
947                 my $script_libs = (ref($libs)) ? $libs : [$libs];
948
949                 $log->debug("Loading script $script_file for biblio fingerprinting...");
950                 
951                 $fp_script = new OpenILS::Utils::ScriptRunner
952                         ( file          => $script_file,
953                           paths         => $script_libs,
954                           reset_count   => 100 );
955         }
956
957         $fp_script->insert('environment' => {marc => $xml} => 1);
958
959         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
960         $log->debug("Script for biblio fingerprinting completed successfully...");
961
962         return $res;
963 }
964 __PACKAGE__->register_method(  
965         api_name        => "open-ils.ingest.fingerprint.xml",
966         method          => "biblio_fingerprint",
967         api_level       => 1,
968         argc            => 1,
969 );                      
970
971 our $rd_script;
972 sub biblio_descriptor {
973         my $self = shift;
974         my $client = shift;
975         my $xml = OpenILS::Application::Ingest::entityize(shift);
976
977         $log->internal("Got MARC [$xml]");
978
979         if(!$rd_script) {
980                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
981                 my $conf = OpenSRF::Utils::SettingsClient->new;
982
983                 my $libs        = $conf->config_value(@pfx, 'script_path');
984                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
985                 my $script_libs = (ref($libs)) ? $libs : [$libs];
986
987                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
988                 
989                 $rd_script = new OpenILS::Utils::ScriptRunner
990                         ( file          => $script_file,
991                           paths         => $script_libs,
992                           reset_count   => 100 );
993         }
994
995         $log->debug("Setting up environment for descriptor extraction script...");
996         $rd_script->insert('environment.marc' => $xml => 1);
997         $log->debug("Environment building complete...");
998
999         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1000         $log->debug("Script for biblio descriptor extraction completed successfully");
1001
1002         return $res;
1003 }
1004 __PACKAGE__->register_method(  
1005         api_name        => "open-ils.ingest.descriptor.xml",
1006         method          => "biblio_descriptor",
1007         api_level       => 1,
1008         argc            => 1,
1009 );                      
1010
1011
1012 1;
1013
1014 __END__
1015
1016 sub in_transaction {
1017         OpenILS::Application::Ingest->post_init();
1018         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1019 }
1020
1021 sub begin_transaction {
1022         my $self = shift;
1023         my $client = shift;
1024         
1025         OpenILS::Application::Ingest->post_init();
1026         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1027         
1028         try {
1029                 if (!$outer_xact) {
1030                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1031                         #__PACKAGE__->st_sess->connect;
1032                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1033                         unless (defined $r and $r) {
1034                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1035                                 #__PACKAGE__->st_sess->disconnect;
1036                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1037                         }
1038                 }
1039         } otherwise {
1040                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1041         };
1042
1043         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1044 }
1045
1046 sub rollback_transaction {
1047         my $self = shift;
1048         my $client = shift;
1049
1050         OpenILS::Application::Ingest->post_init();
1051         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1052
1053         try {
1054                 if ($outer_xact) {
1055                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1056                 } else {
1057                         $log->debug("Ingest isn't inside a transaction.", INFO);
1058                 }
1059         } catch Error with {
1060                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1061         };
1062
1063         return 1;
1064 }
1065
1066 sub commit_transaction {
1067         my $self = shift;
1068         my $client = shift;
1069
1070         OpenILS::Application::Ingest->post_init();
1071         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1072
1073         try {
1074                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1075                 if ($outer_xact) {
1076                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1077                         unless (defined $r and $r) {
1078                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1079                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1080                         }
1081                         #__PACKAGE__->st_sess->disconnect;
1082                 } else {
1083                         $log->debug("Ingest isn't inside a transaction.", INFO);
1084                 }
1085         } catch Error with {
1086                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1087         };
1088
1089         return 1;
1090 }
1091
1092 sub storage_req {
1093         my $self = shift;
1094         my $method = shift;
1095         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1096         return shift( @res );
1097 }
1098
1099 sub scrub_authority_record {
1100         my $self = shift;
1101         my $client = shift;
1102         my $rec = shift;
1103
1104         my $commit = 0;
1105         if (!OpenILS::Application::Ingest->in_transaction) {
1106                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1107                 $commit = 1;
1108         }
1109
1110         my $success = 1;
1111         try {
1112                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1113
1114                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1115                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1116
1117                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1118         } otherwise {
1119                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1120                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1121                 $success = 0;
1122         };
1123
1124         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1125         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1126         return $success;
1127 }
1128 __PACKAGE__->register_method(  
1129         api_name        => "open-ils.worm.scrub.authority",
1130         method          => "scrub_authority_record",
1131         api_level       => 1,
1132         argc            => 1,
1133 );                      
1134
1135
1136 sub scrub_metabib_record {
1137         my $self = shift;
1138         my $client = shift;
1139         my $rec = shift;
1140
1141         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1142                 $rec = OpenILS::Application::Ingest->storage_req(
1143                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1144                 );
1145         }
1146
1147         my $commit = 0;
1148         if (!OpenILS::Application::Ingest->in_transaction) {
1149                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1150                 $commit = 1;
1151         }
1152
1153         my $success = 1;
1154         try {
1155                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1156                 
1157                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1158                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1159                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1160                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1161                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1162                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1163                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1164                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1165
1166                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1167                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1168
1169                 for my $mr (@$masters) {
1170                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1171                         my $others = OpenILS::Application::Ingest->storage_req(
1172                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1173
1174                         if (@$others) {
1175                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1176                                 $mr->master_record($others->[0]->source);
1177                                 OpenILS::Application::Ingest->storage_req(
1178                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1179                                         { id => $mr->id },
1180                                         { master_record => $others->[0]->source, mods => undef }
1181                                 );
1182                         } else {
1183                                 warn "Removing metarecord whose master is $rec";
1184                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1185                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1186                                 warn "Metarecord removed";
1187                                 $log->debug( "Metarecord removed", DEBUG);
1188                         }
1189                 }
1190
1191                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1192
1193         } otherwise {
1194                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1195                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1196                 $success = 0;
1197         };
1198
1199         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1200         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1201         return $success;
1202 }
1203 __PACKAGE__->register_method(  
1204         api_name        => "open-ils.worm.scrub.biblio",
1205         method          => "scrub_metabib_record",
1206         api_level       => 1,
1207         argc            => 1,
1208 );                      
1209
1210 sub wormize_biblio_metarecord {
1211         my $self = shift;
1212         my $client = shift;
1213         my $mrec = shift;
1214
1215         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1216
1217         my $count = 0;
1218         for my $r (@$recs) {
1219                 my $success = 0;
1220                 try {
1221                         $success = wormize_biblio_record($self => $client => $r->source);
1222                         $client->respond(
1223                                 { record  => $r->source,
1224                                   metarecord => $rec->metarecord,
1225                                   success => $success,
1226                                 }
1227                         );
1228                 } catch Error with {
1229                         my $e = shift;
1230                         $client->respond(
1231                                 { record  => $r->source,
1232                                   metarecord => $rec->metarecord,
1233                                   success => $success,
1234                                   error   => $e,
1235                                 }
1236                         );
1237                 };
1238         }
1239         return undef;
1240 }
1241 __PACKAGE__->register_method(
1242         api_name        => "open-ils.worm.wormize.metarecord",
1243         method          => "wormize_biblio_metarecord",
1244         api_level       => 1,
1245         argc            => 1,
1246         stream          => 1,
1247 );
1248 __PACKAGE__->register_method(
1249         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1250         method          => "wormize_biblio_metarecord",
1251         api_level       => 1,
1252         argc            => 1,
1253         stream          => 1,
1254 );
1255 __PACKAGE__->register_method(
1256         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1257         method          => "wormize_biblio_metarecord",
1258         api_level       => 1,
1259         argc            => 1,
1260         stream          => 1,
1261 );
1262 __PACKAGE__->register_method(
1263         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1264         method          => "wormize_biblio_metarecord",
1265         api_level       => 1,
1266         argc            => 1,
1267         stream          => 1,
1268 );
1269
1270
1271 sub wormize_biblio_record {
1272         my $self = shift;
1273         my $client = shift;
1274         my $rec = shift;
1275
1276         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1277                 $rec = OpenILS::Application::Ingest->storage_req(
1278                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1279                 );
1280         }
1281
1282
1283         my $commit = 0;
1284         if (!OpenILS::Application::Ingest->in_transaction) {
1285                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1286                 $commit = 1;
1287         }
1288
1289         my $success = 1;
1290         try {
1291                 # clean up the cruft
1292                 unless ($self->api_name =~ /noscrub/o) {
1293                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1294                 }
1295
1296                 # now redo 'em
1297                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1298
1299                 my @full_rec = ();
1300                 my @rec_descriptor = ();
1301                 my %field_entry = (
1302                         title   => [],
1303                         author  => [],
1304                         subject => [],
1305                         keyword => [],
1306                         series  => [],
1307                 );
1308                 my %metarecord = ();
1309                 my @source_map = ();
1310                 for my $r (@$bibs) {
1311                         try {
1312                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1313
1314                                 my $xml = $parser->parse_string($r->marc);
1315
1316                                 #update the fingerprint
1317                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1318                                 OpenILS::Application::Ingest->storage_req(
1319                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1320                                         { id => $r->id },
1321                                         { fingerprint => $fp->{fingerprint},
1322                                           quality     => int($fp->{quality}) }
1323                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1324
1325                                 # the full_rec stuff
1326                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1327                                         $fr->record( $r->id );
1328                                         push @full_rec, $fr;
1329                                 }
1330
1331                                 # the rec_descriptor stuff
1332                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1333                                 $rd->record( $r->id );
1334                                 push @rec_descriptor, $rd;
1335                         
1336                                 # the indexing field entry stuff
1337                                 for my $class ( qw/title author subject keyword series/ ) {
1338                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1339                                                 $fe->source( $r->id );
1340                                                 push @{$field_entry{$class}}, $fe;
1341                                         }
1342                                 }
1343
1344                                 unless ($self->api_name =~ /nomap/o) {
1345                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1346                                 
1347                                         unless ($mr) {
1348                                                 $mr = Fieldmapper::metabib::metarecord->new;
1349                                                 $mr->fingerprint( $fp->{fingerprint} );
1350                                                 $mr->master_record( $r->id );
1351                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1352                                         }
1353
1354                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1355                                         $mr_map->metarecord( $mr->id );
1356                                         $mr_map->source( $r->id );
1357                                         push @source_map, $mr_map;
1358
1359                                         $metarecord{$mr->id} = $mr;
1360                                 }
1361                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1362                         } otherwise {
1363                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1364                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1365                         };
1366                 }
1367                 
1368
1369                 if (@rec_descriptor) {
1370                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1371
1372                         OpenILS::Application::Ingest->storage_req(
1373                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1374                                 @source_map
1375                         ) if (@source_map);
1376
1377                         for my $mr ( values %metarecord ) {
1378                                 my $sources = OpenILS::Application::Ingest->storage_req(
1379                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1380                                         $mr->id
1381                                 );
1382
1383                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1384                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1385                                         [ map { $_->source } @$sources ]
1386                                 );
1387
1388                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1389
1390                                 OpenILS::Application::Ingest->storage_req(
1391                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1392                                         { id => $mr->id },
1393                                         { master_record => $master->id, mods => undef }
1394                                 );
1395                         }
1396
1397                         OpenILS::Application::Ingest->storage_req(
1398                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1399                                 @rec_descriptor
1400                         ) if (@rec_descriptor);
1401
1402                         OpenILS::Application::Ingest->storage_req(
1403                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1404                                 @full_rec
1405                         ) if (@full_rec);
1406
1407                         OpenILS::Application::Ingest->storage_req(
1408                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1409                                 @{ $field_entry{title} }
1410                         ) if (@{ $field_entry{title} });
1411
1412                         OpenILS::Application::Ingest->storage_req(
1413                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1414                                 @{ $field_entry{author} }
1415                         ) if (@{ $field_entry{author} });
1416                         
1417                         OpenILS::Application::Ingest->storage_req(
1418                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1419                                 @{ $field_entry{subject} }
1420                         ) if (@{ $field_entry{subject} });
1421
1422                         OpenILS::Application::Ingest->storage_req(
1423                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1424                                 @{ $field_entry{keyword} }
1425                         ) if (@{ $field_entry{keyword} });
1426
1427                         OpenILS::Application::Ingest->storage_req(
1428                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1429                                 @{ $field_entry{series} }
1430                         ) if (@{ $field_entry{series} });
1431
1432                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1433                 } else {
1434                         $success = 0;
1435                 }
1436
1437         } otherwise {
1438                 $log->debug('Wormization failed : '.shift(), ERROR);
1439                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1440                 $success = 0;
1441         };
1442
1443         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1444         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1445         return $success;
1446 }
1447 __PACKAGE__->register_method(
1448         api_name        => "open-ils.worm.wormize.biblio",
1449         method          => "wormize_biblio_record",
1450         api_level       => 1,
1451         argc            => 1,
1452 );
1453 __PACKAGE__->register_method(
1454         api_name        => "open-ils.worm.wormize.biblio.nomap",
1455         method          => "wormize_biblio_record",
1456         api_level       => 1,
1457         argc            => 1,
1458 );
1459 __PACKAGE__->register_method(
1460         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1461         method          => "wormize_biblio_record",
1462         api_level       => 1,
1463         argc            => 1,
1464 );
1465 __PACKAGE__->register_method(
1466         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1467         method          => "wormize_biblio_record",
1468         api_level       => 1,
1469         argc            => 1,
1470 );
1471
1472 sub wormize_authority_record {
1473         my $self = shift;
1474         my $client = shift;
1475         my $rec = shift;
1476
1477         my $commit = 0;
1478         if (!OpenILS::Application::Ingest->in_transaction) {
1479                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1480                 $commit = 1;
1481         }
1482
1483         my $success = 1;
1484         try {
1485                 # clean up the cruft
1486                 unless ($self->api_name =~ /noscrub/o) {
1487                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1488                 }
1489
1490                 # now redo 'em
1491                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1492
1493                 my @full_rec = ();
1494                 my @rec_descriptor = ();
1495                 for my $r (@$bibs) {
1496                         my $xml = $parser->parse_string($r->marc);
1497
1498                         # the full_rec stuff
1499                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1500                                 $fr->record( $r->id );
1501                                 push @full_rec, $fr;
1502                         }
1503
1504                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1505                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1506                         #$rd->record( $r->id );
1507                         #push @rec_descriptor, $rd;
1508                         
1509                 }
1510
1511                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1512
1513                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1514                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1515
1516                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1517
1518         } otherwise {
1519                 $log->debug('Wormization failed : '.shift(), ERROR);
1520                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1521                 $success = 0;
1522         };
1523
1524         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1525         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1526         return $success;
1527 }
1528 __PACKAGE__->register_method(
1529         api_name        => "open-ils.worm.wormize.authority",
1530         method          => "wormize_authority_record",
1531         api_level       => 1,
1532         argc            => 1,
1533 );
1534 __PACKAGE__->register_method(
1535         api_name        => "open-ils.worm.wormize.authority.noscrub",
1536         method          => "wormize_authority_record",
1537         api_level       => 1,
1538         argc            => 1,
1539 );
1540
1541
1542 # --------------------------------------------------------------------------------
1543 # MARC index extraction
1544
1545 package OpenILS::Application::Ingest::XPATH;
1546 use base qw/OpenILS::Application::Ingest/;
1547 use Unicode::Normalize;
1548
1549 # give this a MODS documentElement and an XPATH expression
1550 sub _xpath_to_string {
1551         my $xml = shift;
1552         my $xpath = shift;
1553         my $ns_uri = shift;
1554         my $ns_prefix = shift;
1555         my $unique = shift;
1556
1557         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1558
1559         my $string = "";
1560
1561         # grab the set of matching nodes
1562         my @nodes = $xml->findnodes( $xpath );
1563         for my $value (@nodes) {
1564
1565                 # grab all children of the node
1566                 my @children = $value->childNodes();
1567                 for my $child (@children) {
1568
1569                         # add the childs content to the growing buffer
1570                         my $content = quotemeta($child->textContent);
1571                         next if ($unique && $string =~ /$content/);  # uniquify the values
1572                         $string .= $child->textContent . " ";
1573                 }
1574                 if( ! @children ) {
1575                         $string .= $value->textContent . " ";
1576                 }
1577         }
1578         return NFD($string);
1579 }
1580
1581 sub class_all_index_string_xml {
1582         my $self = shift;
1583         my $client = shift;
1584         my $xml = shift;
1585         my $class = shift;
1586
1587         OpenILS::Application::Ingest->post_init();
1588         $xml = $parser->parse_string($xml) unless (ref $xml);
1589         
1590         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1591         for my $type ( keys %{ $xpathset->{$class} } ) {
1592                 my $value =  _xpath_to_string(
1593                                 $mods_sheet->transform($xml)->documentElement,
1594                                 $xpathset->{$class}->{$type}->{xpath},
1595                                 "http://www.loc.gov/mods/",
1596                                 "mods",
1597                                 1
1598                 );
1599
1600                 next unless $value;
1601
1602                 $value = NFD($value);
1603                 $value =~ s/\pM+//sgo;
1604                 $value =~ s/\pC+//sgo;
1605                 $value =~ s/\W+$//sgo;
1606
1607                 $value =~ s/(\w)\./$1/sgo;
1608                 $value = lc($value);
1609
1610                 my $fm = $class_constructor->new;
1611                 $fm->value( $value );
1612                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1613                 $client->respond($fm);
1614         }
1615         return undef;
1616 }
1617 __PACKAGE__->register_method(  
1618         api_name        => "open-ils.worm.field_entry.class.xml",
1619         method          => "class_all_index_string_xml",
1620         api_level       => 1,
1621         argc            => 1,
1622         stream          => 1,
1623 );                      
1624
1625 sub class_all_index_string_record {
1626         my $self = shift;
1627         my $client = shift;
1628         my $rec = shift;
1629         my $class = shift;
1630
1631         OpenILS::Application::Ingest->post_init();
1632         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1633
1634         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1635                 $fm->source($rec);
1636                 $client->respond($fm);
1637         }
1638         return undef;
1639 }
1640 __PACKAGE__->register_method(  
1641         api_name        => "open-ils.worm.field_entry.class.record",
1642         method          => "class_all_index_string_record",
1643         api_level       => 1,
1644         argc            => 1,
1645         stream          => 1,
1646 );                      
1647
1648
1649 sub class_index_string_xml {
1650         my $self = shift;
1651         my $client = shift;
1652         my $xml = shift;
1653         my $class = shift;
1654         my $type = shift;
1655
1656         OpenILS::Application::Ingest->post_init();
1657         $xml = $parser->parse_string($xml) unless (ref $xml);
1658         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1659 }
1660 __PACKAGE__->register_method(  
1661         api_name        => "open-ils.worm.class.type.xml",
1662         method          => "class_index_string_xml",
1663         api_level       => 1,
1664         argc            => 1,
1665 );                      
1666
1667 sub class_index_string_record {
1668         my $self = shift;
1669         my $client = shift;
1670         my $rec = shift;
1671         my $class = shift;
1672         my $type = shift;
1673
1674         OpenILS::Application::Ingest->post_init();
1675         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1676
1677         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1678         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1679         return $d;
1680 }
1681 __PACKAGE__->register_method(  
1682         api_name        => "open-ils.worm.class.type.record",
1683         method          => "class_index_string_record",
1684         api_level       => 1,
1685         argc            => 1,
1686 );                      
1687
1688 sub xml_xpath {
1689         my $self = shift;
1690         my $client = shift;
1691         my $xml = shift;
1692         my $xpath = shift;
1693         my $uri = shift;
1694         my $prefix = shift;
1695         my $unique = shift;
1696
1697         OpenILS::Application::Ingest->post_init();
1698         $xml = $parser->parse_string($xml) unless (ref $xml);
1699         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1700 }
1701 __PACKAGE__->register_method(  
1702         api_name        => "open-ils.worm.xpath.xml",
1703         method          => "xml_xpath",
1704         api_level       => 1,
1705         argc            => 1,
1706 );                      
1707
1708 sub record_xpath {
1709         my $self = shift;
1710         my $client = shift;
1711         my $rec = shift;
1712         my $xpath = shift;
1713         my $uri = shift;
1714         my $prefix = shift;
1715         my $unique = shift;
1716
1717         OpenILS::Application::Ingest->post_init();
1718         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1719
1720         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1721         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1722         return $d;
1723 }
1724 __PACKAGE__->register_method(  
1725         api_name        => "open-ils.worm.xpath.record",
1726         method          => "record_xpath",
1727         api_level       => 1,
1728         argc            => 1,
1729 );                      
1730
1731
1732 # --------------------------------------------------------------------------------
1733 # MARC Descriptor
1734
1735 package OpenILS::Application::Ingest::Biblio::Leader;
1736 use base qw/OpenILS::Application::Ingest/;
1737 use Unicode::Normalize;
1738
1739 our %marc_type_groups = (
1740         BKS => q/[at]{1}/,
1741         SER => q/[a]{1}/,
1742         VIS => q/[gkro]{1}/,
1743         MIX => q/[p]{1}/,
1744         MAP => q/[ef]{1}/,
1745         SCO => q/[cd]{1}/,
1746         REC => q/[ij]{1}/,
1747         COM => q/[m]{1}/,
1748 );
1749
1750 sub _type_re {
1751         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1752         return qr/$re/;
1753 }
1754
1755 our %biblio_descriptor_code = (
1756         item_type => sub { substr($ldr,6,1); },
1757         item_form =>
1758                 sub {
1759                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1760                                 return substr($oo8,29,1);
1761                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1762                                 return substr($oo8,23,1);
1763                         }
1764                         return ' ';
1765                 },
1766         bib_level => sub { substr($ldr,7,1); },
1767         control_type => sub { substr($ldr,8,1); },
1768         char_encoding => sub { substr($ldr,9,1); },
1769         enc_level => sub { substr($ldr,17,1); },
1770         cat_form => sub { substr($ldr,18,1); },
1771         pub_status => sub { substr($ldr,5,1); },
1772         item_lang => sub { substr($oo8,35,3); },
1773         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1774         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1775         audience => sub { substr($oo8,22,1); },
1776 );
1777
1778 sub _extract_biblio_descriptors {
1779         my $xml = shift;
1780
1781         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1782         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1783         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1784
1785         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1786         for my $rd_field ( keys %biblio_descriptor_code ) {
1787                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1788         }
1789
1790         return $rd_obj;
1791 }
1792
1793 sub extract_biblio_desc_xml {
1794         my $self = shift;
1795         my $client = shift;
1796         my $xml = shift;
1797
1798         $xml = $parser->parse_string($xml) unless (ref $xml);
1799
1800         return _extract_biblio_descriptors( $xml );
1801 }
1802 __PACKAGE__->register_method(  
1803         api_name        => "open-ils.worm.biblio_leader.xml",
1804         method          => "extract_biblio_desc_xml",
1805         api_level       => 1,
1806         argc            => 1,
1807 );                      
1808
1809 sub extract_biblio_desc_record {
1810         my $self = shift;
1811         my $client = shift;
1812         my $rec = shift;
1813
1814         OpenILS::Application::Ingest->post_init();
1815         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1816
1817         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1818         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1819         return $d;
1820 }
1821 __PACKAGE__->register_method(  
1822         api_name        => "open-ils.worm.biblio_leader.record",
1823         method          => "extract_biblio_desc_record",
1824         api_level       => 1,
1825         argc            => 1,
1826 );                      
1827
1828 # --------------------------------------------------------------------------------
1829 # Flat MARC
1830
1831 package OpenILS::Application::Ingest::FlatMARC;
1832 use base qw/OpenILS::Application::Ingest/;
1833 use Unicode::Normalize;
1834
1835
1836 sub _marcxml_to_full_rows {
1837
1838         my $marcxml = shift;
1839         my $xmltype = shift || 'metabib';
1840
1841         my $type = "Fieldmapper::${xmltype}::full_rec";
1842
1843         my @ns_list;
1844         
1845         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1846
1847         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1848                 next unless $tagline;
1849
1850                 my $ns = $type->new;
1851
1852                 $ns->tag( 'LDR' );
1853                 my $val = $tagline->textContent;
1854                 $val = NFD($val);
1855                 $val =~ s/\pM+//sgo;
1856                 $val =~ s/\pC+//sgo;
1857                 $val =~ s/\W+$//sgo;
1858                 $ns->value( $val );
1859
1860                 push @ns_list, $ns;
1861         }
1862
1863         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1864                 next unless $tagline;
1865
1866                 my $ns = $type->new;
1867
1868                 $ns->tag( $tagline->getAttribute( "tag" ) );
1869                 my $val = $tagline->textContent;
1870                 $val = NFD($val);
1871                 $val =~ s/\pM+//sgo;
1872                 $val =~ s/\pC+//sgo;
1873                 $val =~ s/\W+$//sgo;
1874                 $ns->value( $val );
1875
1876                 push @ns_list, $ns;
1877         }
1878
1879         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1880                 next unless $tagline;
1881
1882                 my $tag = $tagline->getAttribute( "tag" );
1883                 my $ind1 = $tagline->getAttribute( "ind1" );
1884                 my $ind2 = $tagline->getAttribute( "ind2" );
1885
1886                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1887                         next unless $data;
1888
1889                         my $ns = $type->new;
1890
1891                         $ns->tag( $tag );
1892                         $ns->ind1( $ind1 );
1893                         $ns->ind2( $ind2 );
1894                         $ns->subfield( $data->getAttribute( "code" ) );
1895                         my $val = $data->textContent;
1896                         $val = NFD($val);
1897                         $val =~ s/\pM+//sgo;
1898                         $val =~ s/\pC+//sgo;
1899                         $val =~ s/\W+$//sgo;
1900                         $ns->value( lc($val) );
1901
1902                         push @ns_list, $ns;
1903                 }
1904         }
1905
1906         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1907         return @ns_list;
1908 }
1909
1910 sub flat_marc_xml {
1911         my $self = shift;
1912         my $client = shift;
1913         my $xml = shift;
1914
1915         $xml = $parser->parse_string($xml) unless (ref $xml);
1916
1917         my $type = 'metabib';
1918         $type = 'authority' if ($self->api_name =~ /authority/o);
1919
1920         OpenILS::Application::Ingest->post_init();
1921
1922         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1923         return undef;
1924 }
1925 __PACKAGE__->register_method(  
1926         api_name        => "open-ils.worm.flat_marc.authority.xml",
1927         method          => "flat_marc_xml",
1928         api_level       => 1,
1929         argc            => 1,
1930         stream          => 1,
1931 );                      
1932 __PACKAGE__->register_method(  
1933         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1934         method          => "flat_marc_xml",
1935         api_level       => 1,
1936         argc            => 1,
1937         stream          => 1,
1938 );                      
1939
1940 sub flat_marc_record {
1941         my $self = shift;
1942         my $client = shift;
1943         my $rec = shift;
1944
1945         my $type = 'biblio';
1946         $type = 'authority' if ($self->api_name =~ /authority/o);
1947
1948         OpenILS::Application::Ingest->post_init();
1949         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1950
1951         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1952         return undef;
1953 }
1954 __PACKAGE__->register_method(  
1955         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1956         method          => "flat_marc_record",
1957         api_level       => 1,
1958         argc            => 1,
1959         stream          => 1,
1960 );                      
1961 __PACKAGE__->register_method(  
1962         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1963         method          => "flat_marc_record",
1964         api_level       => 1,
1965         argc            => 1,
1966         stream          => 1,
1967 );                      
1968
1969
1970 # --------------------------------------------------------------------------------
1971 # Fingerprinting
1972
1973 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1974 use base qw/OpenILS::Application::Ingest/;
1975 use Unicode::Normalize;
1976 use OpenSRF::EX qw/:try/;
1977
1978 my @fp_mods_xpath = (
1979         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1980                         title   => {
1981                                         xpath   => [
1982                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1983                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1984                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1985                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1986                                         ],
1987                                         fixup   => sub {
1988                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1989                                                         $text = NFD($text);
1990                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1991                                                         $text =~ s/\pM+//gso;
1992                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1993                                                         $text = lc($text);
1994                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1995                                                         $text =~ s/\s+/ /sgo;
1996                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1997                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1998                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1999                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2000                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2001                                                         $text =~ s/\[.[^\]]+\]//sgo;
2002                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2003                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2004                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2005                                                 },
2006                         },
2007                         author  => {
2008                                         xpath   => [
2009                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2010                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2011                                         ],
2012                                         fixup   => sub {
2013                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2014                                                         $text = NFD($text);
2015                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2016                                                         $text =~ s/\pM+//gso;
2017                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2018                                                         $text = lc($text);
2019                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2020                                                         $text =~ s/\s+/ /sgo;
2021                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2022                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2023                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2024                                                         $text =~ s/,?\s+.*$//sgo;
2025                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2026                                                 },
2027                         },
2028         ],
2029
2030         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2031                         title   => {
2032                                         xpath   => [
2033                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2034                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2035                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2036                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2037                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2038                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2039                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2040                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2041                                         ],
2042                                         fixup   => sub {
2043                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2044                                                         $text = NFD($text);
2045                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2046                                                         $text =~ s/\pM+//gso;
2047                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2048                                                         $text = lc($text);
2049                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2050                                                         $text =~ s/\s+/ /sgo;
2051                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2052                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2053                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2054                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2055                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2056                                                         $text =~ s/\[.[^\]]+\]//sgo;
2057                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2058                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2059                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2060                                                 },
2061                         },
2062                         author  => {
2063                                         xpath   => [
2064                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2065                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2066                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2067                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2068                                         ],
2069                                         fixup   => sub {
2070                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2071                                                         $text = NFD($text);
2072                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2073                                                         $text =~ s/\pM+//gso;
2074                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2075                                                         $text = lc($text);
2076                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2077                                                         $text =~ s/\s+/ /sgo;
2078                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2079                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2080                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2081                                                         $text =~ s/,?\s+.*$//sgo;
2082                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2083                                                 },
2084                         },
2085         ],
2086
2087 );
2088
2089 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2090
2091 sub _fp_mods {
2092         my $mods = shift;
2093         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2094
2095         my $fp_string = '';
2096
2097         my $match_index = 0;
2098         my $block_index = 1;
2099         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2100                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2101
2102                         my $block_name_index = 0;
2103                         my $block_value_index = 1;
2104                         my $block = $fp_mods_xpath[$block_index];
2105                         while ( my $part = $$block[$block_value_index] ) {
2106                                 local $text;
2107                                 for my $xpath ( @{ $part->{xpath} } ) {
2108                                         $text = $mods->findvalue( $xpath );
2109                                         last if ($text);
2110                                 }
2111
2112                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2113
2114                                 if ($text) {
2115                                         $$part{fixup}->();
2116                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2117                                         $fp_string .= $text;
2118                                 }
2119
2120                                 $block_name_index += 2;
2121                                 $block_value_index += 2;
2122                         }
2123                 }
2124                 if ($fp_string) {
2125                         $fp_string =~ s/\W+//gso;
2126                         $log->debug("Fingerprint is [$fp_string]", INFO);;
2127                         return $fp_string;
2128                 }
2129
2130                 $match_index += 2;
2131                 $block_index += 2;
2132         }
2133         return undef;
2134 }
2135
2136 sub refingerprint_bibrec {
2137         my $self = shift;
2138         my $client = shift;
2139         my $rec = shift;
2140
2141         my $commit = 0;
2142         if (!OpenILS::Application::Ingest->in_transaction) {
2143                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2144                 $commit = 1;
2145         }
2146
2147         my $success = 1;
2148         try {
2149                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2150                 for my $b (@$bibs) {
2151                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2152
2153                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2154
2155                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2156
2157                                 OpenILS::Application::Ingest->storage_req(
2158                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2159                                         { id => $b->id },
2160                                         { fingerprint => $fp->{fingerprint},
2161                                           quality     => $fp->{quality} }
2162                                 );
2163
2164                                 if ($self->api_name !~ /nomap/o) {
2165                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2166                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2167                                                 $b->id
2168                                         );
2169
2170                                         my $old_mrid;
2171                                         if (ref($old_source_map) and @$old_source_map) {
2172                                                 for my $m (@$old_source_map) {
2173                                                         $old_mrid = $m->metarecord;
2174                                                         OpenILS::Application::Ingest->storage_req(
2175                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2176                                                                 $m->id
2177                                                         );
2178                                                 }
2179                                         }
2180
2181                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2182                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2183                                                         { metarecord => $old_mrid }
2184                                         ) if ($old_mrid);
2185
2186                                         if (ref($old_sm) and @$old_sm == 0) {
2187                                                 OpenILS::Application::Ingest->storage_req(
2188                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2189                                                         $old_mrid
2190                                                 );
2191                                         }
2192
2193                                         my $mr = OpenILS::Application::Ingest->storage_req(
2194                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2195                                                         { fingerprint => $fp->{fingerprint} }
2196                                         )->[0];
2197                                 
2198                                         unless ($mr) {
2199                                                 $mr = Fieldmapper::metabib::metarecord->new;
2200                                                 $mr->fingerprint( $fp->{fingerprint} );
2201                                                 $mr->master_record( $b->id );
2202                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2203                                         }
2204
2205                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2206                                         $mr_map->metarecord( $mr->id );
2207                                         $mr_map->source( $b->id );
2208                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2209
2210                                 }
2211                         }
2212                         $client->respond($b->id);
2213                 }
2214
2215         } otherwise {
2216                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2217                 $success = 0;
2218         };
2219
2220         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2221         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2222         return undef;
2223 }
2224 __PACKAGE__->register_method(  
2225         api_name        => "open-ils.worm.fingerprint.record.update",
2226         method          => "refingerprint_bibrec",
2227         api_level       => 1,
2228         argc            => 1,
2229         stream          => 1,
2230 );                      
2231
2232 __PACKAGE__->register_method(  
2233         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2234         method          => "refingerprint_bibrec",
2235         api_level       => 1,
2236         argc            => 1,
2237 );                      
2238
2239 =comment
2240
2241 sub fingerprint_bibrec {
2242         my $self = shift;
2243         my $client = shift;
2244         my $rec = shift;
2245
2246         OpenILS::Application::Ingest->post_init();
2247         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2248
2249         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2250         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2251         return $fp;
2252
2253 }
2254 __PACKAGE__->register_method(  
2255         api_name        => "open-ils.worm.fingerprint.record",
2256         method          => "fingerprint_bibrec",
2257         api_level       => 0,
2258         argc            => 1,
2259 );                      
2260
2261
2262 sub fingerprint_mods {
2263         my $self = shift;
2264         my $client = shift;
2265         my $xml = shift;
2266
2267         OpenILS::Application::Ingest->post_init();
2268         my $mods = $parser->parse_string($xml)->documentElement;
2269
2270         return _fp_mods( $mods );
2271 }
2272 __PACKAGE__->register_method(  
2273         api_name        => "open-ils.worm.fingerprint.mods",
2274         method          => "fingerprint_mods",
2275         api_level       => 1,
2276         argc            => 1,
2277 );                      
2278
2279 sub fingerprint_marc {
2280         my $self = shift;
2281         my $client = shift;
2282         my $xml = shift;
2283
2284         $xml = $parser->parse_string($xml) unless (ref $xml);
2285
2286         OpenILS::Application::Ingest->post_init();
2287         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2288         $log->debug("Returning [$fp] as fingerprint", INFO);
2289         return $fp;
2290 }
2291 __PACKAGE__->register_method(  
2292         api_name        => "open-ils.worm.fingerprint.marc",
2293         method          => "fingerprint_marc",
2294         api_level       => 1,
2295         argc            => 1,
2296 );                      
2297
2298
2299 =cut
2300
2301 sub biblio_fingerprint_record {
2302         my $self = shift;
2303         my $client = shift;
2304         my $rec = shift;
2305
2306         OpenILS::Application::Ingest->post_init();
2307
2308         my $marc = OpenILS::Application::Ingest
2309                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2310                         ->marc;
2311
2312         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2313         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2314         return $fp;
2315 }
2316 __PACKAGE__->register_method(  
2317         api_name        => "open-ils.worm.fingerprint.record",
2318         method          => "biblio_fingerprint_record",
2319         api_level       => 1,
2320         argc            => 1,
2321 );                      
2322
2323 our $fp_script;
2324 sub biblio_fingerprint {
2325         my $self = shift;
2326         my $client = shift;
2327         my $marc = shift;
2328
2329         OpenILS::Application::Ingest->post_init();
2330
2331         $marc = $parser->parse_string($marc) unless (ref $marc);
2332
2333         my $mods = OpenILS::Application::Ingest::entityize(
2334                 $mods_sheet
2335                         ->transform( $marc )
2336                         ->documentElement
2337                         ->toString,
2338                 'D'
2339         );
2340
2341         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2342
2343         warn $marc;
2344         $log->internal("Got MARC [$marc]");
2345         $log->internal("Created MODS [$mods]");
2346
2347         if(!$fp_script) {
2348                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2349                 my $conf = OpenSRF::Utils::SettingsClient->new;
2350
2351                 my $libs        = $conf->config_value(@pfx, 'script_path');
2352                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2353                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2354
2355                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2356                 
2357                 $fp_script = new OpenILS::Utils::ScriptRunner
2358                         ( file          => $script_file,
2359                           paths         => $script_libs,
2360                           reset_count   => 1000 );
2361         }
2362
2363         $log->debug("Applying environment for biblio fingerprinting...");
2364
2365         my $env = {marc => $marc, mods => $mods};
2366         #my $res = {fingerprint => '', quality => '0'};
2367
2368         $fp_script->insert('environment' => $env);
2369         #$fp_script->insert('result' => $res);
2370
2371         $log->debug("Running script for biblio fingerprinting...");
2372
2373         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2374
2375         $log->debug("Script for biblio fingerprinting completed successfully...");
2376
2377         return $res;
2378 }
2379 __PACKAGE__->register_method(  
2380         api_name        => "open-ils.worm.fingerprint.marc",
2381         method          => "biblio_fingerprint",
2382         api_level       => 1,
2383         argc            => 1,
2384 );                      
2385
2386 # --------------------------------------------------------------------------------
2387
2388 1;
2389
2390 __END__
2391 my $in_xact;
2392 my $begin;
2393 my $commit;
2394 my $rollback;
2395 my $lookup;
2396 my $update_entry;
2397 my $mr_lookup;
2398 my $mr_update;
2399 my $mr_create;
2400 my $create_source_map;
2401 my $sm_lookup;
2402 my $rm_old_rd;
2403 my $rm_old_sm;
2404 my $rm_old_fr;
2405 my $rm_old_tr;
2406 my $rm_old_ar;
2407 my $rm_old_sr;
2408 my $rm_old_kr;
2409 my $rm_old_ser;
2410
2411 my $fr_create;
2412 my $rd_create;
2413 my $create = {};
2414
2415 my %descriptor_code = (
2416         item_type => 'substr($ldr,6,1)',
2417         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2418         bib_level => 'substr($ldr,7,1)',
2419         control_type => 'substr($ldr,8,1)',
2420         char_encoding => 'substr($ldr,9,1)',
2421         enc_level => 'substr($ldr,17,1)',
2422         cat_form => 'substr($ldr,18,1)',
2423         pub_status => 'substr($ldr,5,1)',
2424         item_lang => 'substr($oo8,35,3)',
2425         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2426         audience => 'substr($oo8,22,1)',
2427 );
2428
2429 sub wormize {
2430
2431         my $self = shift;
2432         my $client = shift;
2433         my @docids = @_;
2434
2435         my $no_map = 0;
2436         if ($self->api_name =~ /no_map/o) {
2437                 $no_map = 1;
2438         }
2439
2440         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2441                 unless ($in_xact);
2442         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2443                 unless ($begin);
2444         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2445                 unless ($commit);
2446         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2447                 unless ($rollback);
2448         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2449                 unless ($sm_lookup);
2450         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2451                 unless ($mr_lookup);
2452         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2453                 unless ($mr_update);
2454         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2455                 unless ($lookup);
2456         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2457                 unless ($update_entry);
2458         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2459                 unless ($rm_old_sm);
2460         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2461                 unless ($rm_old_rd);
2462         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2463                 unless ($rm_old_fr);
2464         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2465                 unless ($rm_old_tr);
2466         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2467                 unless ($rm_old_ar);
2468         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2469                 unless ($rm_old_sr);
2470         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2471                 unless ($rm_old_kr);
2472         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2473                 unless ($rm_old_ser);
2474         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2475                 unless ($mr_create);
2476         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2477                 unless ($create_source_map);
2478         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2479                 unless ($rd_create);
2480         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2481                 unless ($fr_create);
2482         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2483                 unless ($$create{title});
2484         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2485                 unless ($$create{author});
2486         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2487                 unless ($$create{subject});
2488         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2489                 unless ($$create{keyword});
2490         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2491                 unless ($$create{series});
2492
2493
2494         my ($outer_xact) = $in_xact->run;
2495         try {
2496                 unless ($outer_xact) {
2497                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2498                         my ($r) = $begin->run($client);
2499                         unless (defined $r and $r) {
2500                                 $rollback->run;
2501                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2502                         }
2503                 }
2504         } catch Error with {
2505                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2506         };
2507
2508         my @source_maps;
2509         my @entry_list;
2510         my @mr_list;
2511         my @rd_list;
2512         my @ns_list;
2513         my @mods_data;
2514         my $ret = 0;
2515         for my $entry ( $lookup->run(@docids) ) {
2516                 # step -1: grab the doc from storage
2517                 next unless ($entry);
2518
2519                 if(!$mods_sheet) {
2520                         my $xslt_doc = $parser->parse_file(
2521                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2522                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2523                 }
2524
2525                 my $xml = $entry->marc;
2526                 my $docid = $entry->id;
2527                 my $marcdoc = $parser->parse_string($xml);
2528                 my $modsdoc = $mods_sheet->transform($marcdoc);
2529
2530                 my $mods = $modsdoc->documentElement;
2531                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2532
2533                 $entry->fingerprint( fingerprint_mods( $mods ) );
2534                 push @entry_list, $entry;
2535
2536                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2537
2538                 unless ($no_map) {
2539                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2540                         if (!$mr || !@$mr) {
2541                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2542                                 $mr = new Fieldmapper::metabib::metarecord;
2543                                 $mr->fingerprint( $entry->fingerprint );
2544                                 $mr->master_record( $entry->id );
2545                                 my ($new_mr) = $mr_create->run($mr);
2546                                 $mr->id($new_mr);
2547                                 unless (defined $mr) {
2548                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2549                                 }
2550                         } else {
2551                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2552                                 $mr->mods('');
2553                                 push @mr_list, $mr;
2554                         }
2555
2556                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2557                         $sm->metarecord( $mr->id );
2558                         $sm->source( $entry->id );
2559                         push @source_maps, $sm;
2560                 }
2561
2562                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2563                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2564
2565                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2566                 for my $rd_field ( keys %descriptor_code ) {
2567                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2568                 }
2569                 $rd_obj->record( $docid );
2570                 push @rd_list, $rd_obj;
2571
2572                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2573
2574                 # step 2: build the KOHA rows
2575                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2576                 $_->record( $docid ) for (@tmp_list);
2577                 push @ns_list, @tmp_list;
2578
2579                 $ret++;
2580
2581                 last unless ($self->api_name =~ /batch$/o);
2582         }
2583
2584         $rm_old_rd->run( { record => \@docids } );
2585         $rm_old_fr->run( { record => \@docids } );
2586         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2587         $rm_old_tr->run( { source => \@docids } );
2588         $rm_old_ar->run( { source => \@docids } );
2589         $rm_old_sr->run( { source => \@docids } );
2590         $rm_old_kr->run( { source => \@docids } );
2591         $rm_old_ser->run( { source => \@docids } );
2592
2593         unless ($no_map) {
2594                 my ($sm) = $create_source_map->run(@source_maps);
2595                 unless (defined $sm) {
2596                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2597                 }
2598                 my ($mr) = $mr_update->run(@mr_list);
2599                 unless (defined $mr) {
2600                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2601                 }
2602         }
2603
2604         my ($re) = $update_entry->run(@entry_list);
2605         unless (defined $re) {
2606                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2607         }
2608
2609         my ($rd) = $rd_create->run(@rd_list);
2610         unless (defined $rd) {
2611                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2612         }
2613
2614         my ($fr) = $fr_create->run(@ns_list);
2615         unless (defined $fr) {
2616                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2617         }
2618
2619         # step 5: insert the new metadata
2620         for my $class ( qw/title author subject keyword series/ ) {
2621                 my @md_list = ();
2622                 for my $doc ( @mods_data ) {
2623                         my ($did) = keys %$doc;
2624                         my ($data) = values %$doc;
2625
2626                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2627                         for my $row ( keys %{ $$data{$class} } ) {
2628                                 next unless (exists $$data{$class}{$row});
2629                                 next unless ($$data{$class}{$row}{value});
2630                                 my $fm_obj = $fm_constructor->new;
2631                                 $fm_obj->value( $$data{$class}{$row}{value} );
2632                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2633                                 $fm_obj->source( $did );
2634                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2635
2636                                 push @md_list, $fm_obj;
2637                         }
2638                 }
2639                         
2640                 my ($cr) = $$create{$class}->run(@md_list);
2641                 unless (defined $cr) {
2642                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2643                 }
2644         }
2645
2646         unless ($outer_xact) {
2647                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2648                 my ($c) = $commit->run;
2649                 unless (defined $c and $c) {
2650                         $rollback->run;
2651                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2652                 }
2653         }
2654
2655         return $ret;
2656 }
2657 __PACKAGE__->register_method( 
2658         api_name        => "open-ils.worm.wormize",
2659         method          => "wormize",
2660         api_level       => 1,
2661         argc            => 1,
2662 );
2663 __PACKAGE__->register_method( 
2664         api_name        => "open-ils.worm.wormize.no_map",
2665         method          => "wormize",
2666         api_level       => 1,
2667         argc            => 1,
2668 );
2669 __PACKAGE__->register_method( 
2670         api_name        => "open-ils.worm.wormize.batch",
2671         method          => "wormize",
2672         api_level       => 1,
2673         argc            => 1,
2674 );
2675 __PACKAGE__->register_method( 
2676         api_name        => "open-ils.worm.wormize.no_map.batch",
2677         method          => "wormize",
2678         api_level       => 1,
2679         argc            => 1,
2680 );
2681
2682
2683 my $ain_xact;
2684 my $abegin;
2685 my $acommit;
2686 my $arollback;
2687 my $alookup;
2688 my $aupdate_entry;
2689 my $amr_lookup;
2690 my $amr_update;
2691 my $amr_create;
2692 my $acreate_source_map;
2693 my $asm_lookup;
2694 my $arm_old_rd;
2695 my $arm_old_sm;
2696 my $arm_old_fr;
2697 my $arm_old_tr;
2698 my $arm_old_ar;
2699 my $arm_old_sr;
2700 my $arm_old_kr;
2701 my $arm_old_ser;
2702
2703 my $afr_create;
2704 my $ard_create;
2705 my $acreate = {};
2706
2707 sub authority_wormize {
2708
2709         my $self = shift;
2710         my $client = shift;
2711         my @docids = @_;
2712
2713         my $no_map = 0;
2714         if ($self->api_name =~ /no_map/o) {
2715                 $no_map = 1;
2716         }
2717
2718         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2719                 unless ($in_xact);
2720         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2721                 unless ($begin);
2722         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2723                 unless ($commit);
2724         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2725                 unless ($rollback);
2726         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2727                 unless ($alookup);
2728         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2729                 unless ($aupdate_entry);
2730         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2731                 unless ($arm_old_rd);
2732         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2733                 unless ($arm_old_fr);
2734         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2735                 unless ($ard_create);
2736         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2737                 unless ($afr_create);
2738
2739
2740         my ($outer_xact) = $in_xact->run;
2741         try {
2742                 unless ($outer_xact) {
2743                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2744                         my ($r) = $begin->run($client);
2745                         unless (defined $r and $r) {
2746                                 $rollback->run;
2747                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2748                         }
2749                 }
2750         } catch Error with {
2751                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2752         };
2753
2754         my @source_maps;
2755         my @entry_list;
2756         my @mr_list;
2757         my @rd_list;
2758         my @ns_list;
2759         my @mads_data;
2760         my $ret = 0;
2761         for my $entry ( $lookup->run(@docids) ) {
2762                 # step -1: grab the doc from storage
2763                 next unless ($entry);
2764
2765                 #if(!$mads_sheet) {
2766                 #       my $xslt_doc = $parser->parse_file(
2767                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2768                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2769                 #}
2770
2771                 my $xml = $entry->marc;
2772                 my $docid = $entry->id;
2773                 my $marcdoc = $parser->parse_string($xml);
2774                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2775
2776                 #my $mads = $madsdoc->documentElement;
2777                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2778
2779                 push @entry_list, $entry;
2780
2781                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2782                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2783
2784                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2785                 for my $rd_field ( keys %descriptor_code ) {
2786                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2787                 }
2788                 $rd_obj->record( $docid );
2789                 push @rd_list, $rd_obj;
2790
2791                 # step 2: build the KOHA rows
2792                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2793                 $_->record( $docid ) for (@tmp_list);
2794                 push @ns_list, @tmp_list;
2795
2796                 $ret++;
2797
2798                 last unless ($self->api_name =~ /batch$/o);
2799         }
2800
2801         $arm_old_rd->run( { record => \@docids } );
2802         $arm_old_fr->run( { record => \@docids } );
2803
2804         my ($rd) = $ard_create->run(@rd_list);
2805         unless (defined $rd) {
2806                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2807         }
2808
2809         my ($fr) = $fr_create->run(@ns_list);
2810         unless (defined $fr) {
2811                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2812         }
2813
2814         unless ($outer_xact) {
2815                 $log->debug("Commiting transaction started by Ingest.", INFO);
2816                 my ($c) = $commit->run;
2817                 unless (defined $c and $c) {
2818                         $rollback->run;
2819                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2820                 }
2821         }
2822
2823         return $ret;
2824 }
2825 __PACKAGE__->register_method( 
2826         api_name        => "open-ils.worm.authortiy.wormize",
2827         method          => "wormize",
2828         api_level       => 1,
2829         argc            => 1,
2830 );
2831 __PACKAGE__->register_method( 
2832         api_name        => "open-ils.worm.authority.wormize.batch",
2833         method          => "wormize",
2834         api_level       => 1,
2835         argc            => 1,
2836 );
2837
2838
2839 # --------------------------------------------------------------------------------
2840
2841
2842 sub _marcxml_to_full_rows {
2843
2844         my $marcxml = shift;
2845         my $type = shift || 'Fieldmapper::metabib::full_rec';
2846
2847         my @ns_list;
2848         
2849         my $root = $marcxml->documentElement;
2850
2851         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2852                 next unless $tagline;
2853
2854                 my $ns = new Fieldmapper::metabib::full_rec;
2855
2856                 $ns->tag( 'LDR' );
2857                 my $val = NFD($tagline->textContent);
2858                 $val =~ s/(\pM+)//gso;
2859                 $ns->value( $val );
2860
2861                 push @ns_list, $ns;
2862         }
2863
2864         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2865                 next unless $tagline;
2866
2867                 my $ns = new Fieldmapper::metabib::full_rec;
2868
2869                 $ns->tag( $tagline->getAttribute( "tag" ) );
2870                 my $val = NFD($tagline->textContent);
2871                 $val =~ s/(\pM+)//gso;
2872                 $ns->value( $val );
2873
2874                 push @ns_list, $ns;
2875         }
2876
2877         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2878                 next unless $tagline;
2879
2880                 my $tag = $tagline->getAttribute( "tag" );
2881                 my $ind1 = $tagline->getAttribute( "ind1" );
2882                 my $ind2 = $tagline->getAttribute( "ind2" );
2883
2884                 for my $data ( $tagline->childNodes ) {
2885                         next unless $data;
2886
2887                         my $ns = $type->new;
2888
2889                         $ns->tag( $tag );
2890                         $ns->ind1( $ind1 );
2891                         $ns->ind2( $ind2 );
2892                         $ns->subfield( $data->getAttribute( "code" ) );
2893                         my $val = NFD($data->textContent);
2894                         $val =~ s/(\pM+)//gso;
2895                         $ns->value( lc($val) );
2896
2897                         push @ns_list, $ns;
2898                 }
2899         }
2900         return @ns_list;
2901 }
2902
2903 sub _get_field_value {
2904
2905         my( $root, $xpath ) = @_;
2906
2907         my $string = "";
2908
2909         # grab the set of matching nodes
2910         my @nodes = $root->findnodes( $xpath );
2911         for my $value (@nodes) {
2912
2913                 # grab all children of the node
2914                 my @children = $value->childNodes();
2915                 for my $child (@children) {
2916
2917                         # add the childs content to the growing buffer
2918                         my $content = quotemeta($child->textContent);
2919                         next if ($string =~ /$content/);  # uniquify the values
2920                         $string .= $child->textContent . " ";
2921                 }
2922                 if( ! @children ) {
2923                         $string .= $value->textContent . " ";
2924                 }
2925         }
2926         $string = NFD($string);
2927         $string =~ s/(\pM)//gso;
2928         return lc($string);
2929 }
2930
2931
2932 sub modsdoc_to_values {
2933         my( $self, $mods ) = @_;
2934         my $data = {};
2935         for my $class (keys %$xpathset) {
2936                 $data->{$class} = {};
2937                 for my $type (keys %{$xpathset->{$class}}) {
2938                         $data->{$class}->{$type} = {};
2939                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2940                 }
2941         }
2942         return $data;
2943 }
2944
2945
2946 1;
2947
2948