]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
fixing minor MR mapping bug
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21 our %supported_formats = (
22         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
23         mods    => {ns => 'http://www.loc.gov/mods/'},
24         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
26         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28         atom    => {ns => 'http://www.w3.org/2005/Atom'},
29         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
30         rss092  => {ns => ''},
31         rss093  => {ns => ''},
32         rss094  => {ns => ''},
33         rss10   => {ns => 'http://purl.org/rss/1.0/'},
34         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
35         rss2    => {ns => ''},
36 );
37
38
39 my $log = 'OpenSRF::Utils::Logger';
40
41 my  $parser = XML::LibXML->new();
42 my  $xslt = XML::LibXSLT->new();
43
44 my  $mods_sheet;
45 my  $mads_sheet;
46 my  $xpathset = {};
47 sub initialize {}
48 sub child_init {}
49
50 sub post_init {
51
52         unless (keys %$xpathset) {
53                 $log->debug("Running post_init", DEBUG);
54
55                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56
57                 unless ($supported_formats{mods}{xslt}) {
58                         $log->debug("Loading MODS XSLT", DEBUG);
59                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61                 }
62
63                 unless ($supported_formats{mods3}{xslt}) {
64                         $log->debug("Loading MODS v3 XSLT", DEBUG);
65                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67                 }
68
69
70                 my $req = OpenSRF::AppSession
71                                 ->create('open-ils.cstore')
72                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
73                                 ->gather(1);
74
75                 if (ref $req and @$req) {
76                         for my $f (@$req) {
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
81                         }
82                 }
83         }
84 }
85
86 sub entityize {
87         my $stuff = shift;
88         my $form = shift;
89
90         if ($form eq 'D') {
91                 $stuff = NFD($stuff);
92         } else {
93                 $stuff = NFC($stuff);
94         }
95
96         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
97         return $stuff;
98 }
99
100 # --------------------------------------------------------------------------------
101 # Biblio ingest
102
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
106
107 sub rw_biblio_ingest_single_object {
108         my $self = shift;
109         my $client = shift;
110         my $bib = shift;
111
112         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113         return undef unless ($blob);
114
115         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116         $bib->quality( $blob->{fingerprint}->{quality} );
117
118         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
119
120         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
121
122         # update full_rec stuff ...
123         my $tmp = $cstore->request(
124                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125                 { record => $bib->id }
126         )->gather(1);
127
128         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
130
131         # update rec_descriptor stuff ...
132         $tmp = $cstore->request(
133                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134                 { record => $bib->id }
135         )->gather(1);
136
137         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
139
140         # deal with classed fields...
141         for my $class ( qw/title author subject keyword series/ ) {
142                 $tmp = $cstore->request(
143                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144                         { source => $bib->id }
145                 )->gather(1);
146
147                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
148         }
149         for my $obj ( @{ $blob->{field_entries} } ) {
150                 my $class = $obj->class_name;
151                 $class =~ s/^Fieldmapper:://o;
152                 $class =~ s/::/./go;
153                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
154         }
155
156         # update MR map ...
157
158         $tmp = $cstore->request(
159                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160                 { source => $bib->id }
161         )->gather(1);
162
163         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
164
165
166         # Get the matchin MR, if any.
167         my $mr = $cstore->request(
168                 'open-ils.cstore.direct.metabib.metarecord.search',
169                 { fingerprint => $bib->fingerprint }
170         )->gather(1);
171
172         if (!$mr) {
173                 $mr = new Fieldmapper::metabib::metarecord;
174                 $mr->fingerprint( $bib->fingerprint );
175                 $mr->master_record( $bib->id );
176                 $mr->id(
177                         $cstore->request(
178                                 "open-ils.cstore.direct.metabib.metarecord.create",
179                                 $mr => { quiet => 'true' }
180                         )->gather(1)
181                 );
182         } else {
183                 my $mrm = $cstore->request(
184                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185                         { metarecord => $mr->id }
186                 )->gather(1);
187
188                 if (@$mrm) {
189                         my $best = $cstore->request(
190                                 "open-ils.cstore.direct.biblio.record_entry.search",
191                                 { id => [ map { $_->source } @$mrm ] },
192                                 { 'select'      => { bre => [ qw/id quality/ ] },
193                                 order_by        => { bre => "quality desc" },
194                                 limit           => 1,
195                                 }
196                         )->gather(1);
197
198                         if ($best->quality > $bib->quality) {
199                                 $mr->master_record($best->id);
200                         } else {
201                                 $mr->master_record($bib->id);
202                         }
203                 } else {
204                         $mr->master_record($bib->id);
205                 }
206
207                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
208         }
209
210         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
211         $mrm->source($bib->id);
212         $mrm->metarecord($mr->id);
213
214         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
215         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
216
217         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
218
219         return $bib->id;
220 }
221 __PACKAGE__->register_method(  
222         api_name        => "open-ils.ingest.full.biblio.object",
223         method          => "rw_biblio_ingest_single_object",
224         api_level       => 1,
225         argc            => 1,
226 );                      
227
228 sub rw_biblio_ingest_single_record {
229         my $self = shift;
230         my $client = shift;
231         my $rec = shift;
232
233         OpenILS::Application::Ingest->post_init();
234         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
235         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
236
237         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
238
239         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
240         $cstore->disconnect;
241
242         return undef unless ($r and @$r);
243
244         return $self->method_lookup("open-ils.ingest.full.biblio.object")->run($r);
245 }
246 __PACKAGE__->register_method(  
247         api_name        => "open-ils.ingest.full.biblio.record",
248         method          => "rw_biblio_ingest_single_record",
249         api_level       => 1,
250         argc            => 1,
251 );                      
252
253 sub ro_biblio_ingest_single_object {
254         my $self = shift;
255         my $client = shift;
256         my $bib = shift;
257         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
258
259         my $document = $parser->parse_string($xml);
260
261         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
262         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
263         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
264         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
265
266         $_->source($bib->id) for (@mXfe);
267         $_->record($bib->id) for (@mfr);
268         $rd->record($bib->id) if ($rd);
269
270         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
271 }
272 __PACKAGE__->register_method(  
273         api_name        => "open-ils.ingest.full.biblio.object.readonly",
274         method          => "ro_biblio_ingest_single_object",
275         api_level       => 1,
276         argc            => 1,
277 );                      
278
279 sub ro_biblio_ingest_single_xml {
280         my $self = shift;
281         my $client = shift;
282         my $xml = OpenILS::Application::Ingest::entityize(shift);
283
284         my $document = $parser->parse_string($xml);
285
286         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
287         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
288         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
289         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
290
291         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
292 }
293 __PACKAGE__->register_method(  
294         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
295         method          => "ro_biblio_ingest_single_xml",
296         api_level       => 1,
297         argc            => 1,
298 );                      
299
300 sub ro_biblio_ingest_single_record {
301         my $self = shift;
302         my $client = shift;
303         my $rec = shift;
304
305         OpenILS::Application::Ingest->post_init();
306         my $r = OpenSRF::AppSession
307                         ->create('open-ils.cstore')
308                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
309                         ->gather(1);
310
311         return undef unless ($r and @$r);
312
313         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
314
315         $_->source($rec) for (@{$res->{field_entries}});
316         $_->record($rec) for (@{$res->{full_rec}});
317         $res->{descriptor}->record($rec);
318
319         return $res;
320 }
321 __PACKAGE__->register_method(  
322         api_name        => "open-ils.ingest.full.biblio.record.readonly",
323         method          => "ro_biblio_ingest_single_record",
324         api_level       => 1,
325         argc            => 1,
326 );                      
327
328 sub ro_biblio_ingest_stream_record {
329         my $self = shift;
330         my $client = shift;
331
332         OpenILS::Application::Ingest->post_init();
333
334         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
335
336         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
337         
338                 my $rec = $resp->content;
339                 last unless (defined $rec);
340
341                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
342                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
343
344                 $_->source($rec) for (@{$res->{field_entries}});
345                 $_->record($rec) for (@{$res->{full_rec}});
346
347                 $client->respond( $res );
348         }
349
350         return undef;
351 }
352 __PACKAGE__->register_method(  
353         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
354         method          => "ro_biblio_ingest_stream_record",
355         api_level       => 1,
356         stream          => 1,
357 );                      
358
359 sub ro_biblio_ingest_stream_xml {
360         my $self = shift;
361         my $client = shift;
362
363         OpenILS::Application::Ingest->post_init();
364
365         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
366
367         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
368         
369                 my $xml = $resp->content;
370                 last unless (defined $xml);
371
372                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
373                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
374
375                 $client->respond( $res );
376         }
377
378         return undef;
379 }
380 __PACKAGE__->register_method(  
381         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
382         method          => "ro_biblio_ingest_stream_xml",
383         api_level       => 1,
384         stream          => 1,
385 );                      
386
387 sub rw_biblio_ingest_stream_import {
388         my $self = shift;
389         my $client = shift;
390
391         OpenILS::Application::Ingest->post_init();
392
393         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
394
395         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
396         
397                 my $bib = $resp->content;
398                 last unless (defined $bib);
399
400                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
401                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
402
403                 $_->source($bib->id) for (@{$res->{field_entries}});
404                 $_->record($bib->id) for (@{$res->{full_rec}});
405
406                 $client->respond( $res );
407         }
408
409         return undef;
410 }
411 __PACKAGE__->register_method(  
412         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
413         method          => "rw_biblio_ingest_stream_import",
414         api_level       => 1,
415         stream          => 1,
416 );                      
417
418
419 # --------------------------------------------------------------------------------
420 # Authority ingest
421
422 package OpenILS::Application::Ingest::Authority;
423 use base qw/OpenILS::Application::Ingest/;
424 use Unicode::Normalize;
425
426 sub ro_authority_ingest_single_object {
427         my $self = shift;
428         my $client = shift;
429         my $bib = shift;
430         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
431
432         my $document = $parser->parse_string($xml);
433
434         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
435
436         $_->record($bib->id) for (@mfr);
437
438         return { full_rec => \@mfr };
439 }
440 __PACKAGE__->register_method(  
441         api_name        => "open-ils.ingest.full.authority.object.readonly",
442         method          => "ro_authority_ingest_single_object",
443         api_level       => 1,
444         argc            => 1,
445 );                      
446
447 sub ro_authority_ingest_single_xml {
448         my $self = shift;
449         my $client = shift;
450         my $xml = OpenILS::Application::Ingest::entityize(shift);
451
452         my $document = $parser->parse_string($xml);
453
454         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
455
456         return { full_rec => \@mfr };
457 }
458 __PACKAGE__->register_method(  
459         api_name        => "open-ils.ingest.full.authority.xml.readonly",
460         method          => "ro_authority_ingest_single_xml",
461         api_level       => 1,
462         argc            => 1,
463 );                      
464
465 sub ro_authority_ingest_single_record {
466         my $self = shift;
467         my $client = shift;
468         my $rec = shift;
469
470         OpenILS::Application::Ingest->post_init();
471         my $r = OpenSRF::AppSession
472                         ->create('open-ils.cstore')
473                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
474                         ->gather(1);
475
476         return undef unless ($r and @$r);
477
478         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
479
480         $_->record($rec) for (@{$res->{full_rec}});
481         $res->{descriptor}->record($rec);
482
483         return $res;
484 }
485 __PACKAGE__->register_method(  
486         api_name        => "open-ils.ingest.full.authority.record.readonly",
487         method          => "ro_authority_ingest_single_record",
488         api_level       => 1,
489         argc            => 1,
490 );                      
491
492 sub ro_authority_ingest_stream_record {
493         my $self = shift;
494         my $client = shift;
495
496         OpenILS::Application::Ingest->post_init();
497
498         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
499
500         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
501         
502                 my $rec = $resp->content;
503                 last unless (defined $rec);
504
505                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
506                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
507
508                 $_->record($rec) for (@{$res->{full_rec}});
509
510                 $client->respond( $res );
511         }
512
513         return undef;
514 }
515 __PACKAGE__->register_method(  
516         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
517         method          => "ro_authority_ingest_stream_record",
518         api_level       => 1,
519         stream          => 1,
520 );                      
521
522 sub ro_authority_ingest_stream_xml {
523         my $self = shift;
524         my $client = shift;
525
526         OpenILS::Application::Ingest->post_init();
527
528         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
529
530         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
531         
532                 my $xml = $resp->content;
533                 last unless (defined $xml);
534
535                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
536                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
537
538                 $client->respond( $res );
539         }
540
541         return undef;
542 }
543 __PACKAGE__->register_method(  
544         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
545         method          => "ro_authority_ingest_stream_xml",
546         api_level       => 1,
547         stream          => 1,
548 );                      
549
550 sub rw_authority_ingest_stream_import {
551         my $self = shift;
552         my $client = shift;
553
554         OpenILS::Application::Ingest->post_init();
555
556         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
557
558         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
559         
560                 my $bib = $resp->content;
561                 last unless (defined $bib);
562
563                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
564                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
565
566                 $_->record($bib->id) for (@{$res->{full_rec}});
567
568                 $client->respond( $res );
569         }
570
571         return undef;
572 }
573 __PACKAGE__->register_method(  
574         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
575         method          => "rw_authority_ingest_stream_import",
576         api_level       => 1,
577         stream          => 1,
578 );                      
579
580
581 # --------------------------------------------------------------------------------
582 # MARC index extraction
583
584 package OpenILS::Application::Ingest::XPATH;
585 use base qw/OpenILS::Application::Ingest/;
586 use Unicode::Normalize;
587
588 # give this an XML documentElement and an XPATH expression
589 sub xpath_to_string {
590         my $xml = shift;
591         my $xpath = shift;
592         my $ns_uri = shift;
593         my $ns_prefix = shift;
594         my $unique = shift;
595
596         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
597
598         my $string = "";
599
600         # grab the set of matching nodes
601         my @nodes = $xml->findnodes( $xpath );
602         for my $value (@nodes) {
603
604                 # grab all children of the node
605                 my @children = $value->childNodes();
606                 for my $child (@children) {
607
608                         # add the childs content to the growing buffer
609                         my $content = quotemeta($child->textContent);
610                         next if ($unique && $string =~ /$content/);  # uniquify the values
611                         $string .= $child->textContent . " ";
612                 }
613                 if( ! @children ) {
614                         $string .= $value->textContent . " ";
615                 }
616         }
617         return NFD($string);
618 }
619
620 sub class_index_string_xml {
621         my $self = shift;
622         my $client = shift;
623         my $xml = shift;
624         my @classes = @_;
625
626         OpenILS::Application::Ingest->post_init();
627         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
628
629         my %transform_cache;
630         
631         for my $class (@classes) {
632                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
633                 for my $type ( keys %{ $xpathset->{$class} } ) {
634
635                         my $def = $xpathset->{$class}->{$type};
636                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
637
638                         my $document = $xml;
639
640                         if ($sf->{xslt}) {
641                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
642                                 $transform_cache{$def->{format}} = $document;
643                         }
644
645                         my $value =  xpath_to_string(
646                                         $document->documentElement      => $def->{xpath},
647                                         $sf->{ns}                       => $def->{format},
648                                         1
649                         );
650
651                         next unless $value;
652
653                         $value = NFD($value);
654                         $value =~ s/\pM+//sgo;
655                         $value =~ s/\pC+//sgo;
656                         $value =~ s/\W+$//sgo;
657
658                         $value =~ s/(\w)\.+(\w)/$1$2/sgo;
659                         $value = lc($value);
660
661                         my $fm = $class_constructor->new;
662                         $fm->value( $value );
663                         $fm->field( $xpathset->{$class}->{$type}->{id} );
664                         $client->respond($fm);
665                 }
666         }
667         return undef;
668 }
669 __PACKAGE__->register_method(  
670         api_name        => "open-ils.ingest.field_entry.class.xml",
671         method          => "class_index_string_xml",
672         api_level       => 1,
673         argc            => 2,
674         stream          => 1,
675 );                      
676
677 sub class_index_string_record {
678         my $self = shift;
679         my $client = shift;
680         my $rec = shift;
681         my @classes = shift;
682
683         OpenILS::Application::Ingest->post_init();
684         my $r = OpenSRF::AppSession
685                         ->create('open-ils.cstore')
686                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
687                         ->gather(1);
688
689         return undef unless ($r and @$r);
690
691         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
692                 $fm->source($rec);
693                 $client->respond($fm);
694         }
695         return undef;
696 }
697 __PACKAGE__->register_method(  
698         api_name        => "open-ils.ingest.field_entry.class.record",
699         method          => "class_index_string_record",
700         api_level       => 1,
701         argc            => 2,
702         stream          => 1,
703 );                      
704
705 sub all_index_string_xml {
706         my $self = shift;
707         my $client = shift;
708         my $xml = shift;
709
710         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
711                 $client->respond($fm);
712         }
713         return undef;
714 }
715 __PACKAGE__->register_method(  
716         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
717         method          => "all_index_string_xml",
718         api_level       => 1,
719         argc            => 1,
720         stream          => 1,
721 );                      
722
723 sub all_index_string_record {
724         my $self = shift;
725         my $client = shift;
726         my $rec = shift;
727
728         OpenILS::Application::Ingest->post_init();
729         my $r = OpenSRF::AppSession
730                         ->create('open-ils.cstore')
731                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
732                         ->gather(1);
733
734         return undef unless ($r and @$r);
735
736         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
737                 $fm->source($rec);
738                 $client->respond($fm);
739         }
740         return undef;
741 }
742 __PACKAGE__->register_method(  
743         api_name        => "open-ils.ingest.extract.field_entry.all.record",
744         method          => "all_index_string_record",
745         api_level       => 1,
746         argc            => 1,
747         stream          => 1,
748 );                      
749
750 # --------------------------------------------------------------------------------
751 # Flat MARC
752
753 package OpenILS::Application::Ingest::FlatMARC;
754 use base qw/OpenILS::Application::Ingest/;
755 use Unicode::Normalize;
756
757
758 sub _marcxml_to_full_rows {
759
760         my $marcxml = shift;
761         my $xmltype = shift || 'metabib';
762
763         my $type = "Fieldmapper::${xmltype}::full_rec";
764
765         my @ns_list;
766         
767         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
768
769         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
770                 next unless $tagline;
771
772                 my $ns = $type->new;
773
774                 $ns->tag( 'LDR' );
775                 my $val = $tagline->textContent;
776                 $val = NFD($val);
777                 $val =~ s/\pM+//sgo;
778                 $val =~ s/\pC+//sgo;
779                 $val =~ s/\W+$//sgo;
780                 $ns->value( $val );
781
782                 push @ns_list, $ns;
783         }
784
785         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
786                 next unless $tagline;
787
788                 my $ns = $type->new;
789
790                 $ns->tag( $tagline->getAttribute( "tag" ) );
791                 my $val = $tagline->textContent;
792                 $val = NFD($val);
793                 $val =~ s/\pM+//sgo;
794                 $val =~ s/\pC+//sgo;
795                 $val =~ s/\W+$//sgo;
796                 $ns->value( $val );
797
798                 push @ns_list, $ns;
799         }
800
801         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
802                 next unless $tagline;
803
804                 my $tag = $tagline->getAttribute( "tag" );
805                 my $ind1 = $tagline->getAttribute( "ind1" );
806                 my $ind2 = $tagline->getAttribute( "ind2" );
807
808                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
809                         next unless $data;
810
811                         my $ns = $type->new;
812
813                         $ns->tag( $tag );
814                         $ns->ind1( $ind1 );
815                         $ns->ind2( $ind2 );
816                         $ns->subfield( $data->getAttribute( "code" ) );
817                         my $val = $data->textContent;
818                         $val = NFD($val);
819                         $val =~ s/\pM+//sgo;
820                         $val =~ s/\pC+//sgo;
821                         $val =~ s/\W+$//sgo;
822                         $ns->value( lc($val) );
823
824                         push @ns_list, $ns;
825                 }
826         }
827
828         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
829         return @ns_list;
830 }
831
832 sub flat_marc_xml {
833         my $self = shift;
834         my $client = shift;
835         my $xml = shift;
836
837         $log->debug("processing [$xml]");
838
839         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
840
841         my $type = 'metabib';
842         $type = 'authority' if ($self->api_name =~ /authority/o);
843
844         OpenILS::Application::Ingest->post_init();
845
846         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
847         return undef;
848 }
849 __PACKAGE__->register_method(  
850         api_name        => "open-ils.ingest.flat_marc.authority.xml",
851         method          => "flat_marc_xml",
852         api_level       => 1,
853         argc            => 1,
854         stream          => 1,
855 );                      
856 __PACKAGE__->register_method(  
857         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
858         method          => "flat_marc_xml",
859         api_level       => 1,
860         argc            => 1,
861         stream          => 1,
862 );                      
863
864 sub flat_marc_record {
865         my $self = shift;
866         my $client = shift;
867         my $rec = shift;
868
869         my $type = 'biblio';
870         $type = 'authority' if ($self->api_name =~ /authority/o);
871
872         OpenILS::Application::Ingest->post_init();
873         my $r = OpenSRF::AppSession
874                         ->create('open-ils.cstore')
875                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
876                         ->gather(1);
877
878
879         return undef unless ($r and $r->marc);
880
881         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
882         for my $row (@rows) {
883                 $client->respond($row);
884                 $log->debug(JSON->perl2JSON($row), DEBUG);
885         }
886         return undef;
887 }
888 __PACKAGE__->register_method(  
889         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
890         method          => "flat_marc_record",
891         api_level       => 1,
892         argc            => 1,
893         stream          => 1,
894 );                      
895 __PACKAGE__->register_method(  
896         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
897         method          => "flat_marc_record",
898         api_level       => 1,
899         argc            => 1,
900         stream          => 1,
901 );                      
902
903 # --------------------------------------------------------------------------------
904 # Fingerprinting
905
906 package OpenILS::Application::Ingest::Biblio::Fingerprint;
907 use base qw/OpenILS::Application::Ingest/;
908 use Unicode::Normalize;
909 use OpenSRF::EX qw/:try/;
910
911 sub biblio_fingerprint_record {
912         my $self = shift;
913         my $client = shift;
914         my $rec = shift;
915
916         OpenILS::Application::Ingest->post_init();
917
918         my $r = OpenSRF::AppSession
919                         ->create('open-ils.cstore')
920                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
921                         ->gather(1);
922
923         return undef unless ($r and $r->marc);
924
925         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
926         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
927         $fp->{quality} = int($fp->{quality});
928         return $fp;
929 }
930 __PACKAGE__->register_method(  
931         api_name        => "open-ils.ingest.fingerprint.record",
932         method          => "biblio_fingerprint_record",
933         api_level       => 1,
934         argc            => 1,
935 );                      
936
937 our $fp_script;
938 sub biblio_fingerprint {
939         my $self = shift;
940         my $client = shift;
941         my $xml = OpenILS::Application::Ingest::entityize(shift);
942
943         $log->internal("Got MARC [$xml]");
944
945         if(!$fp_script) {
946                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
947                 my $conf = OpenSRF::Utils::SettingsClient->new;
948
949                 my $libs        = $conf->config_value(@pfx, 'script_path');
950                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
951                 my $script_libs = (ref($libs)) ? $libs : [$libs];
952
953                 $log->debug("Loading script $script_file for biblio fingerprinting...");
954                 
955                 $fp_script = new OpenILS::Utils::ScriptRunner
956                         ( file          => $script_file,
957                           paths         => $script_libs,
958                           reset_count   => 100 );
959         }
960
961         $fp_script->insert('environment' => {marc => $xml} => 1);
962
963         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
964         $log->debug("Script for biblio fingerprinting completed successfully...");
965
966         return $res;
967 }
968 __PACKAGE__->register_method(  
969         api_name        => "open-ils.ingest.fingerprint.xml",
970         method          => "biblio_fingerprint",
971         api_level       => 1,
972         argc            => 1,
973 );                      
974
975 our $rd_script;
976 sub biblio_descriptor {
977         my $self = shift;
978         my $client = shift;
979         my $xml = OpenILS::Application::Ingest::entityize(shift);
980
981         $log->internal("Got MARC [$xml]");
982
983         if(!$rd_script) {
984                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
985                 my $conf = OpenSRF::Utils::SettingsClient->new;
986
987                 my $libs        = $conf->config_value(@pfx, 'script_path');
988                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
989                 my $script_libs = (ref($libs)) ? $libs : [$libs];
990
991                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
992                 
993                 $rd_script = new OpenILS::Utils::ScriptRunner
994                         ( file          => $script_file,
995                           paths         => $script_libs,
996                           reset_count   => 100 );
997         }
998
999         $log->debug("Setting up environment for descriptor extraction script...");
1000         $rd_script->insert('environment.marc' => $xml => 1);
1001         $log->debug("Environment building complete...");
1002
1003         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1004         $log->debug("Script for biblio descriptor extraction completed successfully");
1005
1006         return $res;
1007 }
1008 __PACKAGE__->register_method(  
1009         api_name        => "open-ils.ingest.descriptor.xml",
1010         method          => "biblio_descriptor",
1011         api_level       => 1,
1012         argc            => 1,
1013 );                      
1014
1015
1016 1;
1017
1018 __END__
1019
1020 sub in_transaction {
1021         OpenILS::Application::Ingest->post_init();
1022         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1023 }
1024
1025 sub begin_transaction {
1026         my $self = shift;
1027         my $client = shift;
1028         
1029         OpenILS::Application::Ingest->post_init();
1030         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1031         
1032         try {
1033                 if (!$outer_xact) {
1034                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1035                         #__PACKAGE__->st_sess->connect;
1036                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1037                         unless (defined $r and $r) {
1038                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1039                                 #__PACKAGE__->st_sess->disconnect;
1040                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1041                         }
1042                 }
1043         } otherwise {
1044                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1045         };
1046
1047         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1048 }
1049
1050 sub rollback_transaction {
1051         my $self = shift;
1052         my $client = shift;
1053
1054         OpenILS::Application::Ingest->post_init();
1055         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1056
1057         try {
1058                 if ($outer_xact) {
1059                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1060                 } else {
1061                         $log->debug("Ingest isn't inside a transaction.", INFO);
1062                 }
1063         } catch Error with {
1064                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1065         };
1066
1067         return 1;
1068 }
1069
1070 sub commit_transaction {
1071         my $self = shift;
1072         my $client = shift;
1073
1074         OpenILS::Application::Ingest->post_init();
1075         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1076
1077         try {
1078                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1079                 if ($outer_xact) {
1080                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1081                         unless (defined $r and $r) {
1082                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1083                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1084                         }
1085                         #__PACKAGE__->st_sess->disconnect;
1086                 } else {
1087                         $log->debug("Ingest isn't inside a transaction.", INFO);
1088                 }
1089         } catch Error with {
1090                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1091         };
1092
1093         return 1;
1094 }
1095
1096 sub storage_req {
1097         my $self = shift;
1098         my $method = shift;
1099         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1100         return shift( @res );
1101 }
1102
1103 sub scrub_authority_record {
1104         my $self = shift;
1105         my $client = shift;
1106         my $rec = shift;
1107
1108         my $commit = 0;
1109         if (!OpenILS::Application::Ingest->in_transaction) {
1110                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1111                 $commit = 1;
1112         }
1113
1114         my $success = 1;
1115         try {
1116                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1117
1118                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1119                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1120
1121                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1122         } otherwise {
1123                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1124                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1125                 $success = 0;
1126         };
1127
1128         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1129         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1130         return $success;
1131 }
1132 __PACKAGE__->register_method(  
1133         api_name        => "open-ils.worm.scrub.authority",
1134         method          => "scrub_authority_record",
1135         api_level       => 1,
1136         argc            => 1,
1137 );                      
1138
1139
1140 sub scrub_metabib_record {
1141         my $self = shift;
1142         my $client = shift;
1143         my $rec = shift;
1144
1145         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1146                 $rec = OpenILS::Application::Ingest->storage_req(
1147                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1148                 );
1149         }
1150
1151         my $commit = 0;
1152         if (!OpenILS::Application::Ingest->in_transaction) {
1153                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1154                 $commit = 1;
1155         }
1156
1157         my $success = 1;
1158         try {
1159                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1160                 
1161                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1162                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1163                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1164                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1165                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1166                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1167                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1168                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1169
1170                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1171                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1172
1173                 for my $mr (@$masters) {
1174                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1175                         my $others = OpenILS::Application::Ingest->storage_req(
1176                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1177
1178                         if (@$others) {
1179                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1180                                 $mr->master_record($others->[0]->source);
1181                                 OpenILS::Application::Ingest->storage_req(
1182                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1183                                         { id => $mr->id },
1184                                         { master_record => $others->[0]->source, mods => undef }
1185                                 );
1186                         } else {
1187                                 warn "Removing metarecord whose master is $rec";
1188                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1189                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1190                                 warn "Metarecord removed";
1191                                 $log->debug( "Metarecord removed", DEBUG);
1192                         }
1193                 }
1194
1195                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1196
1197         } otherwise {
1198                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1199                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1200                 $success = 0;
1201         };
1202
1203         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1204         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1205         return $success;
1206 }
1207 __PACKAGE__->register_method(  
1208         api_name        => "open-ils.worm.scrub.biblio",
1209         method          => "scrub_metabib_record",
1210         api_level       => 1,
1211         argc            => 1,
1212 );                      
1213
1214 sub wormize_biblio_metarecord {
1215         my $self = shift;
1216         my $client = shift;
1217         my $mrec = shift;
1218
1219         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1220
1221         my $count = 0;
1222         for my $r (@$recs) {
1223                 my $success = 0;
1224                 try {
1225                         $success = wormize_biblio_record($self => $client => $r->source);
1226                         $client->respond(
1227                                 { record  => $r->source,
1228                                   metarecord => $rec->metarecord,
1229                                   success => $success,
1230                                 }
1231                         );
1232                 } catch Error with {
1233                         my $e = shift;
1234                         $client->respond(
1235                                 { record  => $r->source,
1236                                   metarecord => $rec->metarecord,
1237                                   success => $success,
1238                                   error   => $e,
1239                                 }
1240                         );
1241                 };
1242         }
1243         return undef;
1244 }
1245 __PACKAGE__->register_method(
1246         api_name        => "open-ils.worm.wormize.metarecord",
1247         method          => "wormize_biblio_metarecord",
1248         api_level       => 1,
1249         argc            => 1,
1250         stream          => 1,
1251 );
1252 __PACKAGE__->register_method(
1253         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1254         method          => "wormize_biblio_metarecord",
1255         api_level       => 1,
1256         argc            => 1,
1257         stream          => 1,
1258 );
1259 __PACKAGE__->register_method(
1260         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1261         method          => "wormize_biblio_metarecord",
1262         api_level       => 1,
1263         argc            => 1,
1264         stream          => 1,
1265 );
1266 __PACKAGE__->register_method(
1267         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1268         method          => "wormize_biblio_metarecord",
1269         api_level       => 1,
1270         argc            => 1,
1271         stream          => 1,
1272 );
1273
1274
1275 sub wormize_biblio_record {
1276         my $self = shift;
1277         my $client = shift;
1278         my $rec = shift;
1279
1280         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1281                 $rec = OpenILS::Application::Ingest->storage_req(
1282                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1283                 );
1284         }
1285
1286
1287         my $commit = 0;
1288         if (!OpenILS::Application::Ingest->in_transaction) {
1289                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1290                 $commit = 1;
1291         }
1292
1293         my $success = 1;
1294         try {
1295                 # clean up the cruft
1296                 unless ($self->api_name =~ /noscrub/o) {
1297                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1298                 }
1299
1300                 # now redo 'em
1301                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1302
1303                 my @full_rec = ();
1304                 my @rec_descriptor = ();
1305                 my %field_entry = (
1306                         title   => [],
1307                         author  => [],
1308                         subject => [],
1309                         keyword => [],
1310                         series  => [],
1311                 );
1312                 my %metarecord = ();
1313                 my @source_map = ();
1314                 for my $r (@$bibs) {
1315                         try {
1316                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1317
1318                                 my $xml = $parser->parse_string($r->marc);
1319
1320                                 #update the fingerprint
1321                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1322                                 OpenILS::Application::Ingest->storage_req(
1323                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1324                                         { id => $r->id },
1325                                         { fingerprint => $fp->{fingerprint},
1326                                           quality     => int($fp->{quality}) }
1327                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1328
1329                                 # the full_rec stuff
1330                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1331                                         $fr->record( $r->id );
1332                                         push @full_rec, $fr;
1333                                 }
1334
1335                                 # the rec_descriptor stuff
1336                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1337                                 $rd->record( $r->id );
1338                                 push @rec_descriptor, $rd;
1339                         
1340                                 # the indexing field entry stuff
1341                                 for my $class ( qw/title author subject keyword series/ ) {
1342                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1343                                                 $fe->source( $r->id );
1344                                                 push @{$field_entry{$class}}, $fe;
1345                                         }
1346                                 }
1347
1348                                 unless ($self->api_name =~ /nomap/o) {
1349                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1350                                 
1351                                         unless ($mr) {
1352                                                 $mr = Fieldmapper::metabib::metarecord->new;
1353                                                 $mr->fingerprint( $fp->{fingerprint} );
1354                                                 $mr->master_record( $r->id );
1355                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1356                                         }
1357
1358                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1359                                         $mr_map->metarecord( $mr->id );
1360                                         $mr_map->source( $r->id );
1361                                         push @source_map, $mr_map;
1362
1363                                         $metarecord{$mr->id} = $mr;
1364                                 }
1365                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1366                         } otherwise {
1367                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1368                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1369                         };
1370                 }
1371                 
1372
1373                 if (@rec_descriptor) {
1374                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1375
1376                         OpenILS::Application::Ingest->storage_req(
1377                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1378                                 @source_map
1379                         ) if (@source_map);
1380
1381                         for my $mr ( values %metarecord ) {
1382                                 my $sources = OpenILS::Application::Ingest->storage_req(
1383                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1384                                         $mr->id
1385                                 );
1386
1387                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1388                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1389                                         [ map { $_->source } @$sources ]
1390                                 );
1391
1392                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1393
1394                                 OpenILS::Application::Ingest->storage_req(
1395                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1396                                         { id => $mr->id },
1397                                         { master_record => $master->id, mods => undef }
1398                                 );
1399                         }
1400
1401                         OpenILS::Application::Ingest->storage_req(
1402                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1403                                 @rec_descriptor
1404                         ) if (@rec_descriptor);
1405
1406                         OpenILS::Application::Ingest->storage_req(
1407                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1408                                 @full_rec
1409                         ) if (@full_rec);
1410
1411                         OpenILS::Application::Ingest->storage_req(
1412                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1413                                 @{ $field_entry{title} }
1414                         ) if (@{ $field_entry{title} });
1415
1416                         OpenILS::Application::Ingest->storage_req(
1417                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1418                                 @{ $field_entry{author} }
1419                         ) if (@{ $field_entry{author} });
1420                         
1421                         OpenILS::Application::Ingest->storage_req(
1422                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1423                                 @{ $field_entry{subject} }
1424                         ) if (@{ $field_entry{subject} });
1425
1426                         OpenILS::Application::Ingest->storage_req(
1427                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1428                                 @{ $field_entry{keyword} }
1429                         ) if (@{ $field_entry{keyword} });
1430
1431                         OpenILS::Application::Ingest->storage_req(
1432                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1433                                 @{ $field_entry{series} }
1434                         ) if (@{ $field_entry{series} });
1435
1436                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1437                 } else {
1438                         $success = 0;
1439                 }
1440
1441         } otherwise {
1442                 $log->debug('Wormization failed : '.shift(), ERROR);
1443                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1444                 $success = 0;
1445         };
1446
1447         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1448         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1449         return $success;
1450 }
1451 __PACKAGE__->register_method(
1452         api_name        => "open-ils.worm.wormize.biblio",
1453         method          => "wormize_biblio_record",
1454         api_level       => 1,
1455         argc            => 1,
1456 );
1457 __PACKAGE__->register_method(
1458         api_name        => "open-ils.worm.wormize.biblio.nomap",
1459         method          => "wormize_biblio_record",
1460         api_level       => 1,
1461         argc            => 1,
1462 );
1463 __PACKAGE__->register_method(
1464         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1465         method          => "wormize_biblio_record",
1466         api_level       => 1,
1467         argc            => 1,
1468 );
1469 __PACKAGE__->register_method(
1470         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1471         method          => "wormize_biblio_record",
1472         api_level       => 1,
1473         argc            => 1,
1474 );
1475
1476 sub wormize_authority_record {
1477         my $self = shift;
1478         my $client = shift;
1479         my $rec = shift;
1480
1481         my $commit = 0;
1482         if (!OpenILS::Application::Ingest->in_transaction) {
1483                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1484                 $commit = 1;
1485         }
1486
1487         my $success = 1;
1488         try {
1489                 # clean up the cruft
1490                 unless ($self->api_name =~ /noscrub/o) {
1491                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1492                 }
1493
1494                 # now redo 'em
1495                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1496
1497                 my @full_rec = ();
1498                 my @rec_descriptor = ();
1499                 for my $r (@$bibs) {
1500                         my $xml = $parser->parse_string($r->marc);
1501
1502                         # the full_rec stuff
1503                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1504                                 $fr->record( $r->id );
1505                                 push @full_rec, $fr;
1506                         }
1507
1508                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1509                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1510                         #$rd->record( $r->id );
1511                         #push @rec_descriptor, $rd;
1512                         
1513                 }
1514
1515                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1516
1517                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1518                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1519
1520                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1521
1522         } otherwise {
1523                 $log->debug('Wormization failed : '.shift(), ERROR);
1524                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1525                 $success = 0;
1526         };
1527
1528         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1529         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1530         return $success;
1531 }
1532 __PACKAGE__->register_method(
1533         api_name        => "open-ils.worm.wormize.authority",
1534         method          => "wormize_authority_record",
1535         api_level       => 1,
1536         argc            => 1,
1537 );
1538 __PACKAGE__->register_method(
1539         api_name        => "open-ils.worm.wormize.authority.noscrub",
1540         method          => "wormize_authority_record",
1541         api_level       => 1,
1542         argc            => 1,
1543 );
1544
1545
1546 # --------------------------------------------------------------------------------
1547 # MARC index extraction
1548
1549 package OpenILS::Application::Ingest::XPATH;
1550 use base qw/OpenILS::Application::Ingest/;
1551 use Unicode::Normalize;
1552
1553 # give this a MODS documentElement and an XPATH expression
1554 sub _xpath_to_string {
1555         my $xml = shift;
1556         my $xpath = shift;
1557         my $ns_uri = shift;
1558         my $ns_prefix = shift;
1559         my $unique = shift;
1560
1561         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1562
1563         my $string = "";
1564
1565         # grab the set of matching nodes
1566         my @nodes = $xml->findnodes( $xpath );
1567         for my $value (@nodes) {
1568
1569                 # grab all children of the node
1570                 my @children = $value->childNodes();
1571                 for my $child (@children) {
1572
1573                         # add the childs content to the growing buffer
1574                         my $content = quotemeta($child->textContent);
1575                         next if ($unique && $string =~ /$content/);  # uniquify the values
1576                         $string .= $child->textContent . " ";
1577                 }
1578                 if( ! @children ) {
1579                         $string .= $value->textContent . " ";
1580                 }
1581         }
1582         return NFD($string);
1583 }
1584
1585 sub class_all_index_string_xml {
1586         my $self = shift;
1587         my $client = shift;
1588         my $xml = shift;
1589         my $class = shift;
1590
1591         OpenILS::Application::Ingest->post_init();
1592         $xml = $parser->parse_string($xml) unless (ref $xml);
1593         
1594         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1595         for my $type ( keys %{ $xpathset->{$class} } ) {
1596                 my $value =  _xpath_to_string(
1597                                 $mods_sheet->transform($xml)->documentElement,
1598                                 $xpathset->{$class}->{$type}->{xpath},
1599                                 "http://www.loc.gov/mods/",
1600                                 "mods",
1601                                 1
1602                 );
1603
1604                 next unless $value;
1605
1606                 $value = NFD($value);
1607                 $value =~ s/\pM+//sgo;
1608                 $value =~ s/\pC+//sgo;
1609                 $value =~ s/\W+$//sgo;
1610
1611                 $value =~ s/(\w)\./$1/sgo;
1612                 $value = lc($value);
1613
1614                 my $fm = $class_constructor->new;
1615                 $fm->value( $value );
1616                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1617                 $client->respond($fm);
1618         }
1619         return undef;
1620 }
1621 __PACKAGE__->register_method(  
1622         api_name        => "open-ils.worm.field_entry.class.xml",
1623         method          => "class_all_index_string_xml",
1624         api_level       => 1,
1625         argc            => 1,
1626         stream          => 1,
1627 );                      
1628
1629 sub class_all_index_string_record {
1630         my $self = shift;
1631         my $client = shift;
1632         my $rec = shift;
1633         my $class = shift;
1634
1635         OpenILS::Application::Ingest->post_init();
1636         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1637
1638         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1639                 $fm->source($rec);
1640                 $client->respond($fm);
1641         }
1642         return undef;
1643 }
1644 __PACKAGE__->register_method(  
1645         api_name        => "open-ils.worm.field_entry.class.record",
1646         method          => "class_all_index_string_record",
1647         api_level       => 1,
1648         argc            => 1,
1649         stream          => 1,
1650 );                      
1651
1652
1653 sub class_index_string_xml {
1654         my $self = shift;
1655         my $client = shift;
1656         my $xml = shift;
1657         my $class = shift;
1658         my $type = shift;
1659
1660         OpenILS::Application::Ingest->post_init();
1661         $xml = $parser->parse_string($xml) unless (ref $xml);
1662         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1663 }
1664 __PACKAGE__->register_method(  
1665         api_name        => "open-ils.worm.class.type.xml",
1666         method          => "class_index_string_xml",
1667         api_level       => 1,
1668         argc            => 1,
1669 );                      
1670
1671 sub class_index_string_record {
1672         my $self = shift;
1673         my $client = shift;
1674         my $rec = shift;
1675         my $class = shift;
1676         my $type = shift;
1677
1678         OpenILS::Application::Ingest->post_init();
1679         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1680
1681         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1682         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1683         return $d;
1684 }
1685 __PACKAGE__->register_method(  
1686         api_name        => "open-ils.worm.class.type.record",
1687         method          => "class_index_string_record",
1688         api_level       => 1,
1689         argc            => 1,
1690 );                      
1691
1692 sub xml_xpath {
1693         my $self = shift;
1694         my $client = shift;
1695         my $xml = shift;
1696         my $xpath = shift;
1697         my $uri = shift;
1698         my $prefix = shift;
1699         my $unique = shift;
1700
1701         OpenILS::Application::Ingest->post_init();
1702         $xml = $parser->parse_string($xml) unless (ref $xml);
1703         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1704 }
1705 __PACKAGE__->register_method(  
1706         api_name        => "open-ils.worm.xpath.xml",
1707         method          => "xml_xpath",
1708         api_level       => 1,
1709         argc            => 1,
1710 );                      
1711
1712 sub record_xpath {
1713         my $self = shift;
1714         my $client = shift;
1715         my $rec = shift;
1716         my $xpath = shift;
1717         my $uri = shift;
1718         my $prefix = shift;
1719         my $unique = shift;
1720
1721         OpenILS::Application::Ingest->post_init();
1722         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1723
1724         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1725         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1726         return $d;
1727 }
1728 __PACKAGE__->register_method(  
1729         api_name        => "open-ils.worm.xpath.record",
1730         method          => "record_xpath",
1731         api_level       => 1,
1732         argc            => 1,
1733 );                      
1734
1735
1736 # --------------------------------------------------------------------------------
1737 # MARC Descriptor
1738
1739 package OpenILS::Application::Ingest::Biblio::Leader;
1740 use base qw/OpenILS::Application::Ingest/;
1741 use Unicode::Normalize;
1742
1743 our %marc_type_groups = (
1744         BKS => q/[at]{1}/,
1745         SER => q/[a]{1}/,
1746         VIS => q/[gkro]{1}/,
1747         MIX => q/[p]{1}/,
1748         MAP => q/[ef]{1}/,
1749         SCO => q/[cd]{1}/,
1750         REC => q/[ij]{1}/,
1751         COM => q/[m]{1}/,
1752 );
1753
1754 sub _type_re {
1755         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1756         return qr/$re/;
1757 }
1758
1759 our %biblio_descriptor_code = (
1760         item_type => sub { substr($ldr,6,1); },
1761         item_form =>
1762                 sub {
1763                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1764                                 return substr($oo8,29,1);
1765                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1766                                 return substr($oo8,23,1);
1767                         }
1768                         return ' ';
1769                 },
1770         bib_level => sub { substr($ldr,7,1); },
1771         control_type => sub { substr($ldr,8,1); },
1772         char_encoding => sub { substr($ldr,9,1); },
1773         enc_level => sub { substr($ldr,17,1); },
1774         cat_form => sub { substr($ldr,18,1); },
1775         pub_status => sub { substr($ldr,5,1); },
1776         item_lang => sub { substr($oo8,35,3); },
1777         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1778         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1779         audience => sub { substr($oo8,22,1); },
1780 );
1781
1782 sub _extract_biblio_descriptors {
1783         my $xml = shift;
1784
1785         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1786         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1787         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1788
1789         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1790         for my $rd_field ( keys %biblio_descriptor_code ) {
1791                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1792         }
1793
1794         return $rd_obj;
1795 }
1796
1797 sub extract_biblio_desc_xml {
1798         my $self = shift;
1799         my $client = shift;
1800         my $xml = shift;
1801
1802         $xml = $parser->parse_string($xml) unless (ref $xml);
1803
1804         return _extract_biblio_descriptors( $xml );
1805 }
1806 __PACKAGE__->register_method(  
1807         api_name        => "open-ils.worm.biblio_leader.xml",
1808         method          => "extract_biblio_desc_xml",
1809         api_level       => 1,
1810         argc            => 1,
1811 );                      
1812
1813 sub extract_biblio_desc_record {
1814         my $self = shift;
1815         my $client = shift;
1816         my $rec = shift;
1817
1818         OpenILS::Application::Ingest->post_init();
1819         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1820
1821         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1822         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1823         return $d;
1824 }
1825 __PACKAGE__->register_method(  
1826         api_name        => "open-ils.worm.biblio_leader.record",
1827         method          => "extract_biblio_desc_record",
1828         api_level       => 1,
1829         argc            => 1,
1830 );                      
1831
1832 # --------------------------------------------------------------------------------
1833 # Flat MARC
1834
1835 package OpenILS::Application::Ingest::FlatMARC;
1836 use base qw/OpenILS::Application::Ingest/;
1837 use Unicode::Normalize;
1838
1839
1840 sub _marcxml_to_full_rows {
1841
1842         my $marcxml = shift;
1843         my $xmltype = shift || 'metabib';
1844
1845         my $type = "Fieldmapper::${xmltype}::full_rec";
1846
1847         my @ns_list;
1848         
1849         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1850
1851         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1852                 next unless $tagline;
1853
1854                 my $ns = $type->new;
1855
1856                 $ns->tag( 'LDR' );
1857                 my $val = $tagline->textContent;
1858                 $val = NFD($val);
1859                 $val =~ s/\pM+//sgo;
1860                 $val =~ s/\pC+//sgo;
1861                 $val =~ s/\W+$//sgo;
1862                 $ns->value( $val );
1863
1864                 push @ns_list, $ns;
1865         }
1866
1867         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1868                 next unless $tagline;
1869
1870                 my $ns = $type->new;
1871
1872                 $ns->tag( $tagline->getAttribute( "tag" ) );
1873                 my $val = $tagline->textContent;
1874                 $val = NFD($val);
1875                 $val =~ s/\pM+//sgo;
1876                 $val =~ s/\pC+//sgo;
1877                 $val =~ s/\W+$//sgo;
1878                 $ns->value( $val );
1879
1880                 push @ns_list, $ns;
1881         }
1882
1883         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1884                 next unless $tagline;
1885
1886                 my $tag = $tagline->getAttribute( "tag" );
1887                 my $ind1 = $tagline->getAttribute( "ind1" );
1888                 my $ind2 = $tagline->getAttribute( "ind2" );
1889
1890                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1891                         next unless $data;
1892
1893                         my $ns = $type->new;
1894
1895                         $ns->tag( $tag );
1896                         $ns->ind1( $ind1 );
1897                         $ns->ind2( $ind2 );
1898                         $ns->subfield( $data->getAttribute( "code" ) );
1899                         my $val = $data->textContent;
1900                         $val = NFD($val);
1901                         $val =~ s/\pM+//sgo;
1902                         $val =~ s/\pC+//sgo;
1903                         $val =~ s/\W+$//sgo;
1904                         $ns->value( lc($val) );
1905
1906                         push @ns_list, $ns;
1907                 }
1908         }
1909
1910         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1911         return @ns_list;
1912 }
1913
1914 sub flat_marc_xml {
1915         my $self = shift;
1916         my $client = shift;
1917         my $xml = shift;
1918
1919         $xml = $parser->parse_string($xml) unless (ref $xml);
1920
1921         my $type = 'metabib';
1922         $type = 'authority' if ($self->api_name =~ /authority/o);
1923
1924         OpenILS::Application::Ingest->post_init();
1925
1926         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1927         return undef;
1928 }
1929 __PACKAGE__->register_method(  
1930         api_name        => "open-ils.worm.flat_marc.authority.xml",
1931         method          => "flat_marc_xml",
1932         api_level       => 1,
1933         argc            => 1,
1934         stream          => 1,
1935 );                      
1936 __PACKAGE__->register_method(  
1937         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1938         method          => "flat_marc_xml",
1939         api_level       => 1,
1940         argc            => 1,
1941         stream          => 1,
1942 );                      
1943
1944 sub flat_marc_record {
1945         my $self = shift;
1946         my $client = shift;
1947         my $rec = shift;
1948
1949         my $type = 'biblio';
1950         $type = 'authority' if ($self->api_name =~ /authority/o);
1951
1952         OpenILS::Application::Ingest->post_init();
1953         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1954
1955         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1956         return undef;
1957 }
1958 __PACKAGE__->register_method(  
1959         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1960         method          => "flat_marc_record",
1961         api_level       => 1,
1962         argc            => 1,
1963         stream          => 1,
1964 );                      
1965 __PACKAGE__->register_method(  
1966         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1967         method          => "flat_marc_record",
1968         api_level       => 1,
1969         argc            => 1,
1970         stream          => 1,
1971 );                      
1972
1973
1974 # --------------------------------------------------------------------------------
1975 # Fingerprinting
1976
1977 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1978 use base qw/OpenILS::Application::Ingest/;
1979 use Unicode::Normalize;
1980 use OpenSRF::EX qw/:try/;
1981
1982 my @fp_mods_xpath = (
1983         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1984                         title   => {
1985                                         xpath   => [
1986                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1987                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1988                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1989                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1990                                         ],
1991                                         fixup   => sub {
1992                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1993                                                         $text = NFD($text);
1994                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1995                                                         $text =~ s/\pM+//gso;
1996                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1997                                                         $text = lc($text);
1998                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1999                                                         $text =~ s/\s+/ /sgo;
2000                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2001                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2002                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2003                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2004                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2005                                                         $text =~ s/\[.[^\]]+\]//sgo;
2006                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2007                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2008                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2009                                                 },
2010                         },
2011                         author  => {
2012                                         xpath   => [
2013                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2014                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2015                                         ],
2016                                         fixup   => sub {
2017                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2018                                                         $text = NFD($text);
2019                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2020                                                         $text =~ s/\pM+//gso;
2021                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2022                                                         $text = lc($text);
2023                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2024                                                         $text =~ s/\s+/ /sgo;
2025                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2026                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2027                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2028                                                         $text =~ s/,?\s+.*$//sgo;
2029                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2030                                                 },
2031                         },
2032         ],
2033
2034         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2035                         title   => {
2036                                         xpath   => [
2037                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2038                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2039                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2040                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2041                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2042                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2043                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2044                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2045                                         ],
2046                                         fixup   => sub {
2047                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2048                                                         $text = NFD($text);
2049                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2050                                                         $text =~ s/\pM+//gso;
2051                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2052                                                         $text = lc($text);
2053                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2054                                                         $text =~ s/\s+/ /sgo;
2055                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2056                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2057                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2058                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2059                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2060                                                         $text =~ s/\[.[^\]]+\]//sgo;
2061                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2062                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2063                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2064                                                 },
2065                         },
2066                         author  => {
2067                                         xpath   => [
2068                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2069                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2070                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2071                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2072                                         ],
2073                                         fixup   => sub {
2074                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2075                                                         $text = NFD($text);
2076                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2077                                                         $text =~ s/\pM+//gso;
2078                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2079                                                         $text = lc($text);
2080                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2081                                                         $text =~ s/\s+/ /sgo;
2082                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2083                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2084                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2085                                                         $text =~ s/,?\s+.*$//sgo;
2086                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2087                                                 },
2088                         },
2089         ],
2090
2091 );
2092
2093 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2094
2095 sub _fp_mods {
2096         my $mods = shift;
2097         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2098
2099         my $fp_string = '';
2100
2101         my $match_index = 0;
2102         my $block_index = 1;
2103         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2104                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2105
2106                         my $block_name_index = 0;
2107                         my $block_value_index = 1;
2108                         my $block = $fp_mods_xpath[$block_index];
2109                         while ( my $part = $$block[$block_value_index] ) {
2110                                 local $text;
2111                                 for my $xpath ( @{ $part->{xpath} } ) {
2112                                         $text = $mods->findvalue( $xpath );
2113                                         last if ($text);
2114                                 }
2115
2116                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2117
2118                                 if ($text) {
2119                                         $$part{fixup}->();
2120                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2121                                         $fp_string .= $text;
2122                                 }
2123
2124                                 $block_name_index += 2;
2125                                 $block_value_index += 2;
2126                         }
2127                 }
2128                 if ($fp_string) {
2129                         $fp_string =~ s/\W+//gso;
2130                         $log->debug("Fingerprint is [$fp_string]", INFO);;
2131                         return $fp_string;
2132                 }
2133
2134                 $match_index += 2;
2135                 $block_index += 2;
2136         }
2137         return undef;
2138 }
2139
2140 sub refingerprint_bibrec {
2141         my $self = shift;
2142         my $client = shift;
2143         my $rec = shift;
2144
2145         my $commit = 0;
2146         if (!OpenILS::Application::Ingest->in_transaction) {
2147                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2148                 $commit = 1;
2149         }
2150
2151         my $success = 1;
2152         try {
2153                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2154                 for my $b (@$bibs) {
2155                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2156
2157                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2158
2159                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2160
2161                                 OpenILS::Application::Ingest->storage_req(
2162                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2163                                         { id => $b->id },
2164                                         { fingerprint => $fp->{fingerprint},
2165                                           quality     => $fp->{quality} }
2166                                 );
2167
2168                                 if ($self->api_name !~ /nomap/o) {
2169                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2170                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2171                                                 $b->id
2172                                         );
2173
2174                                         my $old_mrid;
2175                                         if (ref($old_source_map) and @$old_source_map) {
2176                                                 for my $m (@$old_source_map) {
2177                                                         $old_mrid = $m->metarecord;
2178                                                         OpenILS::Application::Ingest->storage_req(
2179                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2180                                                                 $m->id
2181                                                         );
2182                                                 }
2183                                         }
2184
2185                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2186                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2187                                                         { metarecord => $old_mrid }
2188                                         ) if ($old_mrid);
2189
2190                                         if (ref($old_sm) and @$old_sm == 0) {
2191                                                 OpenILS::Application::Ingest->storage_req(
2192                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2193                                                         $old_mrid
2194                                                 );
2195                                         }
2196
2197                                         my $mr = OpenILS::Application::Ingest->storage_req(
2198                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2199                                                         { fingerprint => $fp->{fingerprint} }
2200                                         )->[0];
2201                                 
2202                                         unless ($mr) {
2203                                                 $mr = Fieldmapper::metabib::metarecord->new;
2204                                                 $mr->fingerprint( $fp->{fingerprint} );
2205                                                 $mr->master_record( $b->id );
2206                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2207                                         }
2208
2209                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2210                                         $mr_map->metarecord( $mr->id );
2211                                         $mr_map->source( $b->id );
2212                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2213
2214                                 }
2215                         }
2216                         $client->respond($b->id);
2217                 }
2218
2219         } otherwise {
2220                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2221                 $success = 0;
2222         };
2223
2224         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2225         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2226         return undef;
2227 }
2228 __PACKAGE__->register_method(  
2229         api_name        => "open-ils.worm.fingerprint.record.update",
2230         method          => "refingerprint_bibrec",
2231         api_level       => 1,
2232         argc            => 1,
2233         stream          => 1,
2234 );                      
2235
2236 __PACKAGE__->register_method(  
2237         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2238         method          => "refingerprint_bibrec",
2239         api_level       => 1,
2240         argc            => 1,
2241 );                      
2242
2243 =comment
2244
2245 sub fingerprint_bibrec {
2246         my $self = shift;
2247         my $client = shift;
2248         my $rec = shift;
2249
2250         OpenILS::Application::Ingest->post_init();
2251         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2252
2253         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2254         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2255         return $fp;
2256
2257 }
2258 __PACKAGE__->register_method(  
2259         api_name        => "open-ils.worm.fingerprint.record",
2260         method          => "fingerprint_bibrec",
2261         api_level       => 0,
2262         argc            => 1,
2263 );                      
2264
2265
2266 sub fingerprint_mods {
2267         my $self = shift;
2268         my $client = shift;
2269         my $xml = shift;
2270
2271         OpenILS::Application::Ingest->post_init();
2272         my $mods = $parser->parse_string($xml)->documentElement;
2273
2274         return _fp_mods( $mods );
2275 }
2276 __PACKAGE__->register_method(  
2277         api_name        => "open-ils.worm.fingerprint.mods",
2278         method          => "fingerprint_mods",
2279         api_level       => 1,
2280         argc            => 1,
2281 );                      
2282
2283 sub fingerprint_marc {
2284         my $self = shift;
2285         my $client = shift;
2286         my $xml = shift;
2287
2288         $xml = $parser->parse_string($xml) unless (ref $xml);
2289
2290         OpenILS::Application::Ingest->post_init();
2291         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2292         $log->debug("Returning [$fp] as fingerprint", INFO);
2293         return $fp;
2294 }
2295 __PACKAGE__->register_method(  
2296         api_name        => "open-ils.worm.fingerprint.marc",
2297         method          => "fingerprint_marc",
2298         api_level       => 1,
2299         argc            => 1,
2300 );                      
2301
2302
2303 =cut
2304
2305 sub biblio_fingerprint_record {
2306         my $self = shift;
2307         my $client = shift;
2308         my $rec = shift;
2309
2310         OpenILS::Application::Ingest->post_init();
2311
2312         my $marc = OpenILS::Application::Ingest
2313                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2314                         ->marc;
2315
2316         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2317         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2318         return $fp;
2319 }
2320 __PACKAGE__->register_method(  
2321         api_name        => "open-ils.worm.fingerprint.record",
2322         method          => "biblio_fingerprint_record",
2323         api_level       => 1,
2324         argc            => 1,
2325 );                      
2326
2327 our $fp_script;
2328 sub biblio_fingerprint {
2329         my $self = shift;
2330         my $client = shift;
2331         my $marc = shift;
2332
2333         OpenILS::Application::Ingest->post_init();
2334
2335         $marc = $parser->parse_string($marc) unless (ref $marc);
2336
2337         my $mods = OpenILS::Application::Ingest::entityize(
2338                 $mods_sheet
2339                         ->transform( $marc )
2340                         ->documentElement
2341                         ->toString,
2342                 'D'
2343         );
2344
2345         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2346
2347         warn $marc;
2348         $log->internal("Got MARC [$marc]");
2349         $log->internal("Created MODS [$mods]");
2350
2351         if(!$fp_script) {
2352                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2353                 my $conf = OpenSRF::Utils::SettingsClient->new;
2354
2355                 my $libs        = $conf->config_value(@pfx, 'script_path');
2356                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2357                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2358
2359                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2360                 
2361                 $fp_script = new OpenILS::Utils::ScriptRunner
2362                         ( file          => $script_file,
2363                           paths         => $script_libs,
2364                           reset_count   => 1000 );
2365         }
2366
2367         $log->debug("Applying environment for biblio fingerprinting...");
2368
2369         my $env = {marc => $marc, mods => $mods};
2370         #my $res = {fingerprint => '', quality => '0'};
2371
2372         $fp_script->insert('environment' => $env);
2373         #$fp_script->insert('result' => $res);
2374
2375         $log->debug("Running script for biblio fingerprinting...");
2376
2377         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2378
2379         $log->debug("Script for biblio fingerprinting completed successfully...");
2380
2381         return $res;
2382 }
2383 __PACKAGE__->register_method(  
2384         api_name        => "open-ils.worm.fingerprint.marc",
2385         method          => "biblio_fingerprint",
2386         api_level       => 1,
2387         argc            => 1,
2388 );                      
2389
2390 # --------------------------------------------------------------------------------
2391
2392 1;
2393
2394 __END__
2395 my $in_xact;
2396 my $begin;
2397 my $commit;
2398 my $rollback;
2399 my $lookup;
2400 my $update_entry;
2401 my $mr_lookup;
2402 my $mr_update;
2403 my $mr_create;
2404 my $create_source_map;
2405 my $sm_lookup;
2406 my $rm_old_rd;
2407 my $rm_old_sm;
2408 my $rm_old_fr;
2409 my $rm_old_tr;
2410 my $rm_old_ar;
2411 my $rm_old_sr;
2412 my $rm_old_kr;
2413 my $rm_old_ser;
2414
2415 my $fr_create;
2416 my $rd_create;
2417 my $create = {};
2418
2419 my %descriptor_code = (
2420         item_type => 'substr($ldr,6,1)',
2421         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2422         bib_level => 'substr($ldr,7,1)',
2423         control_type => 'substr($ldr,8,1)',
2424         char_encoding => 'substr($ldr,9,1)',
2425         enc_level => 'substr($ldr,17,1)',
2426         cat_form => 'substr($ldr,18,1)',
2427         pub_status => 'substr($ldr,5,1)',
2428         item_lang => 'substr($oo8,35,3)',
2429         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2430         audience => 'substr($oo8,22,1)',
2431 );
2432
2433 sub wormize {
2434
2435         my $self = shift;
2436         my $client = shift;
2437         my @docids = @_;
2438
2439         my $no_map = 0;
2440         if ($self->api_name =~ /no_map/o) {
2441                 $no_map = 1;
2442         }
2443
2444         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2445                 unless ($in_xact);
2446         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2447                 unless ($begin);
2448         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2449                 unless ($commit);
2450         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2451                 unless ($rollback);
2452         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2453                 unless ($sm_lookup);
2454         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2455                 unless ($mr_lookup);
2456         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2457                 unless ($mr_update);
2458         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2459                 unless ($lookup);
2460         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2461                 unless ($update_entry);
2462         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2463                 unless ($rm_old_sm);
2464         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2465                 unless ($rm_old_rd);
2466         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2467                 unless ($rm_old_fr);
2468         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2469                 unless ($rm_old_tr);
2470         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2471                 unless ($rm_old_ar);
2472         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2473                 unless ($rm_old_sr);
2474         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2475                 unless ($rm_old_kr);
2476         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2477                 unless ($rm_old_ser);
2478         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2479                 unless ($mr_create);
2480         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2481                 unless ($create_source_map);
2482         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2483                 unless ($rd_create);
2484         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2485                 unless ($fr_create);
2486         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2487                 unless ($$create{title});
2488         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2489                 unless ($$create{author});
2490         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2491                 unless ($$create{subject});
2492         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2493                 unless ($$create{keyword});
2494         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2495                 unless ($$create{series});
2496
2497
2498         my ($outer_xact) = $in_xact->run;
2499         try {
2500                 unless ($outer_xact) {
2501                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2502                         my ($r) = $begin->run($client);
2503                         unless (defined $r and $r) {
2504                                 $rollback->run;
2505                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2506                         }
2507                 }
2508         } catch Error with {
2509                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2510         };
2511
2512         my @source_maps;
2513         my @entry_list;
2514         my @mr_list;
2515         my @rd_list;
2516         my @ns_list;
2517         my @mods_data;
2518         my $ret = 0;
2519         for my $entry ( $lookup->run(@docids) ) {
2520                 # step -1: grab the doc from storage
2521                 next unless ($entry);
2522
2523                 if(!$mods_sheet) {
2524                         my $xslt_doc = $parser->parse_file(
2525                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2526                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2527                 }
2528
2529                 my $xml = $entry->marc;
2530                 my $docid = $entry->id;
2531                 my $marcdoc = $parser->parse_string($xml);
2532                 my $modsdoc = $mods_sheet->transform($marcdoc);
2533
2534                 my $mods = $modsdoc->documentElement;
2535                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2536
2537                 $entry->fingerprint( fingerprint_mods( $mods ) );
2538                 push @entry_list, $entry;
2539
2540                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2541
2542                 unless ($no_map) {
2543                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2544                         if (!$mr || !@$mr) {
2545                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2546                                 $mr = new Fieldmapper::metabib::metarecord;
2547                                 $mr->fingerprint( $entry->fingerprint );
2548                                 $mr->master_record( $entry->id );
2549                                 my ($new_mr) = $mr_create->run($mr);
2550                                 $mr->id($new_mr);
2551                                 unless (defined $mr) {
2552                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2553                                 }
2554                         } else {
2555                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2556                                 $mr->mods('');
2557                                 push @mr_list, $mr;
2558                         }
2559
2560                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2561                         $sm->metarecord( $mr->id );
2562                         $sm->source( $entry->id );
2563                         push @source_maps, $sm;
2564                 }
2565
2566                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2567                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2568
2569                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2570                 for my $rd_field ( keys %descriptor_code ) {
2571                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2572                 }
2573                 $rd_obj->record( $docid );
2574                 push @rd_list, $rd_obj;
2575
2576                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2577
2578                 # step 2: build the KOHA rows
2579                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2580                 $_->record( $docid ) for (@tmp_list);
2581                 push @ns_list, @tmp_list;
2582
2583                 $ret++;
2584
2585                 last unless ($self->api_name =~ /batch$/o);
2586         }
2587
2588         $rm_old_rd->run( { record => \@docids } );
2589         $rm_old_fr->run( { record => \@docids } );
2590         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2591         $rm_old_tr->run( { source => \@docids } );
2592         $rm_old_ar->run( { source => \@docids } );
2593         $rm_old_sr->run( { source => \@docids } );
2594         $rm_old_kr->run( { source => \@docids } );
2595         $rm_old_ser->run( { source => \@docids } );
2596
2597         unless ($no_map) {
2598                 my ($sm) = $create_source_map->run(@source_maps);
2599                 unless (defined $sm) {
2600                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2601                 }
2602                 my ($mr) = $mr_update->run(@mr_list);
2603                 unless (defined $mr) {
2604                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2605                 }
2606         }
2607
2608         my ($re) = $update_entry->run(@entry_list);
2609         unless (defined $re) {
2610                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2611         }
2612
2613         my ($rd) = $rd_create->run(@rd_list);
2614         unless (defined $rd) {
2615                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2616         }
2617
2618         my ($fr) = $fr_create->run(@ns_list);
2619         unless (defined $fr) {
2620                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2621         }
2622
2623         # step 5: insert the new metadata
2624         for my $class ( qw/title author subject keyword series/ ) {
2625                 my @md_list = ();
2626                 for my $doc ( @mods_data ) {
2627                         my ($did) = keys %$doc;
2628                         my ($data) = values %$doc;
2629
2630                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2631                         for my $row ( keys %{ $$data{$class} } ) {
2632                                 next unless (exists $$data{$class}{$row});
2633                                 next unless ($$data{$class}{$row}{value});
2634                                 my $fm_obj = $fm_constructor->new;
2635                                 $fm_obj->value( $$data{$class}{$row}{value} );
2636                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2637                                 $fm_obj->source( $did );
2638                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2639
2640                                 push @md_list, $fm_obj;
2641                         }
2642                 }
2643                         
2644                 my ($cr) = $$create{$class}->run(@md_list);
2645                 unless (defined $cr) {
2646                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2647                 }
2648         }
2649
2650         unless ($outer_xact) {
2651                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2652                 my ($c) = $commit->run;
2653                 unless (defined $c and $c) {
2654                         $rollback->run;
2655                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2656                 }
2657         }
2658
2659         return $ret;
2660 }
2661 __PACKAGE__->register_method( 
2662         api_name        => "open-ils.worm.wormize",
2663         method          => "wormize",
2664         api_level       => 1,
2665         argc            => 1,
2666 );
2667 __PACKAGE__->register_method( 
2668         api_name        => "open-ils.worm.wormize.no_map",
2669         method          => "wormize",
2670         api_level       => 1,
2671         argc            => 1,
2672 );
2673 __PACKAGE__->register_method( 
2674         api_name        => "open-ils.worm.wormize.batch",
2675         method          => "wormize",
2676         api_level       => 1,
2677         argc            => 1,
2678 );
2679 __PACKAGE__->register_method( 
2680         api_name        => "open-ils.worm.wormize.no_map.batch",
2681         method          => "wormize",
2682         api_level       => 1,
2683         argc            => 1,
2684 );
2685
2686
2687 my $ain_xact;
2688 my $abegin;
2689 my $acommit;
2690 my $arollback;
2691 my $alookup;
2692 my $aupdate_entry;
2693 my $amr_lookup;
2694 my $amr_update;
2695 my $amr_create;
2696 my $acreate_source_map;
2697 my $asm_lookup;
2698 my $arm_old_rd;
2699 my $arm_old_sm;
2700 my $arm_old_fr;
2701 my $arm_old_tr;
2702 my $arm_old_ar;
2703 my $arm_old_sr;
2704 my $arm_old_kr;
2705 my $arm_old_ser;
2706
2707 my $afr_create;
2708 my $ard_create;
2709 my $acreate = {};
2710
2711 sub authority_wormize {
2712
2713         my $self = shift;
2714         my $client = shift;
2715         my @docids = @_;
2716
2717         my $no_map = 0;
2718         if ($self->api_name =~ /no_map/o) {
2719                 $no_map = 1;
2720         }
2721
2722         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2723                 unless ($in_xact);
2724         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2725                 unless ($begin);
2726         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2727                 unless ($commit);
2728         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2729                 unless ($rollback);
2730         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2731                 unless ($alookup);
2732         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2733                 unless ($aupdate_entry);
2734         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2735                 unless ($arm_old_rd);
2736         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2737                 unless ($arm_old_fr);
2738         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2739                 unless ($ard_create);
2740         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2741                 unless ($afr_create);
2742
2743
2744         my ($outer_xact) = $in_xact->run;
2745         try {
2746                 unless ($outer_xact) {
2747                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2748                         my ($r) = $begin->run($client);
2749                         unless (defined $r and $r) {
2750                                 $rollback->run;
2751                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2752                         }
2753                 }
2754         } catch Error with {
2755                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2756         };
2757
2758         my @source_maps;
2759         my @entry_list;
2760         my @mr_list;
2761         my @rd_list;
2762         my @ns_list;
2763         my @mads_data;
2764         my $ret = 0;
2765         for my $entry ( $lookup->run(@docids) ) {
2766                 # step -1: grab the doc from storage
2767                 next unless ($entry);
2768
2769                 #if(!$mads_sheet) {
2770                 #       my $xslt_doc = $parser->parse_file(
2771                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2772                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2773                 #}
2774
2775                 my $xml = $entry->marc;
2776                 my $docid = $entry->id;
2777                 my $marcdoc = $parser->parse_string($xml);
2778                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2779
2780                 #my $mads = $madsdoc->documentElement;
2781                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2782
2783                 push @entry_list, $entry;
2784
2785                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2786                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2787
2788                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2789                 for my $rd_field ( keys %descriptor_code ) {
2790                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2791                 }
2792                 $rd_obj->record( $docid );
2793                 push @rd_list, $rd_obj;
2794
2795                 # step 2: build the KOHA rows
2796                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2797                 $_->record( $docid ) for (@tmp_list);
2798                 push @ns_list, @tmp_list;
2799
2800                 $ret++;
2801
2802                 last unless ($self->api_name =~ /batch$/o);
2803         }
2804
2805         $arm_old_rd->run( { record => \@docids } );
2806         $arm_old_fr->run( { record => \@docids } );
2807
2808         my ($rd) = $ard_create->run(@rd_list);
2809         unless (defined $rd) {
2810                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2811         }
2812
2813         my ($fr) = $fr_create->run(@ns_list);
2814         unless (defined $fr) {
2815                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2816         }
2817
2818         unless ($outer_xact) {
2819                 $log->debug("Commiting transaction started by Ingest.", INFO);
2820                 my ($c) = $commit->run;
2821                 unless (defined $c and $c) {
2822                         $rollback->run;
2823                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2824                 }
2825         }
2826
2827         return $ret;
2828 }
2829 __PACKAGE__->register_method( 
2830         api_name        => "open-ils.worm.authortiy.wormize",
2831         method          => "wormize",
2832         api_level       => 1,
2833         argc            => 1,
2834 );
2835 __PACKAGE__->register_method( 
2836         api_name        => "open-ils.worm.authority.wormize.batch",
2837         method          => "wormize",
2838         api_level       => 1,
2839         argc            => 1,
2840 );
2841
2842
2843 # --------------------------------------------------------------------------------
2844
2845
2846 sub _marcxml_to_full_rows {
2847
2848         my $marcxml = shift;
2849         my $type = shift || 'Fieldmapper::metabib::full_rec';
2850
2851         my @ns_list;
2852         
2853         my $root = $marcxml->documentElement;
2854
2855         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2856                 next unless $tagline;
2857
2858                 my $ns = new Fieldmapper::metabib::full_rec;
2859
2860                 $ns->tag( 'LDR' );
2861                 my $val = NFD($tagline->textContent);
2862                 $val =~ s/(\pM+)//gso;
2863                 $ns->value( $val );
2864
2865                 push @ns_list, $ns;
2866         }
2867
2868         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2869                 next unless $tagline;
2870
2871                 my $ns = new Fieldmapper::metabib::full_rec;
2872
2873                 $ns->tag( $tagline->getAttribute( "tag" ) );
2874                 my $val = NFD($tagline->textContent);
2875                 $val =~ s/(\pM+)//gso;
2876                 $ns->value( $val );
2877
2878                 push @ns_list, $ns;
2879         }
2880
2881         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2882                 next unless $tagline;
2883
2884                 my $tag = $tagline->getAttribute( "tag" );
2885                 my $ind1 = $tagline->getAttribute( "ind1" );
2886                 my $ind2 = $tagline->getAttribute( "ind2" );
2887
2888                 for my $data ( $tagline->childNodes ) {
2889                         next unless $data;
2890
2891                         my $ns = $type->new;
2892
2893                         $ns->tag( $tag );
2894                         $ns->ind1( $ind1 );
2895                         $ns->ind2( $ind2 );
2896                         $ns->subfield( $data->getAttribute( "code" ) );
2897                         my $val = NFD($data->textContent);
2898                         $val =~ s/(\pM+)//gso;
2899                         $ns->value( lc($val) );
2900
2901                         push @ns_list, $ns;
2902                 }
2903         }
2904         return @ns_list;
2905 }
2906
2907 sub _get_field_value {
2908
2909         my( $root, $xpath ) = @_;
2910
2911         my $string = "";
2912
2913         # grab the set of matching nodes
2914         my @nodes = $root->findnodes( $xpath );
2915         for my $value (@nodes) {
2916
2917                 # grab all children of the node
2918                 my @children = $value->childNodes();
2919                 for my $child (@children) {
2920
2921                         # add the childs content to the growing buffer
2922                         my $content = quotemeta($child->textContent);
2923                         next if ($string =~ /$content/);  # uniquify the values
2924                         $string .= $child->textContent . " ";
2925                 }
2926                 if( ! @children ) {
2927                         $string .= $value->textContent . " ";
2928                 }
2929         }
2930         $string = NFD($string);
2931         $string =~ s/(\pM)//gso;
2932         return lc($string);
2933 }
2934
2935
2936 sub modsdoc_to_values {
2937         my( $self, $mods ) = @_;
2938         my $data = {};
2939         for my $class (keys %$xpathset) {
2940                 $data->{$class} = {};
2941                 for my $type (keys %{$xpathset->{$class}}) {
2942                         $data->{$class}->{$type} = {};
2943                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2944                 }
2945         }
2946         return $data;
2947 }
2948
2949
2950 1;
2951
2952