]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
closing in on an ingester
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => ''},
25         oai_dc  => {ns => ''},
26         rdf_dc  => {ns => ''},
27 );
28
29
30 our $log = 'OpenSRF::Utils::Logger';
31
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
34
35 our $mods_sheet;
36 our $mads_sheet;
37 our $xpathset = {};
38 sub initialize {}
39 sub child_init {}
40
41 sub post_init {
42
43         unless (keys %$xpathset) {
44                 $log->debug("Running post_init", DEBUG);
45
46                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
47
48                 unless ($supported_formats{mods}{xslt}) {
49                         $log->debug("Loading MODS XSLT", DEBUG);
50                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
52                 }
53
54                 unless ($supported_formats{mods3}{xslt}) {
55                         $log->debug("Loading MODS v3 XSLT", DEBUG);
56                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
58                 }
59
60
61                 my $req = OpenSRF::AppSession
62                                 ->create('open-ils.cstore')
63                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
64                                 ->gather(1);
65
66                 if (ref $req and @$req) {
67                         for my $f (@$req) {
68                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
72                         }
73                 }
74         }
75 }
76
77 sub entityize {
78         my $stuff = shift;
79         my $form = shift;
80
81         if ($form eq 'D') {
82                 $stuff = NFD($stuff);
83         } else {
84                 $stuff = NFC($stuff);
85         }
86
87         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
88         return $stuff;
89 }
90
91 sub ro_biblio_ingest_single_xml {
92         my $self = shift;
93         my $client = shift;
94         my $xml = shift;
95
96         my $document = $parser->parse_string($xml);
97
98         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
99         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
100         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
101
102         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp };
103 }
104 __PACKAGE__->register_method(  
105         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
106         method          => "ro_biblio_ingest_single_xml",
107         api_level       => 1,
108         argc            => 1,
109 );                      
110
111 sub ro_biblio_ingest_single_record {
112         my $self = shift;
113         my $client = shift;
114         my $rec = shift;
115
116         OpenILS::Application::Ingest->post_init();
117         my $r = OpenSRF::AppSession
118                         ->create('open-ils.cstore')
119                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
120                         ->gather(1);
121
122         return undef unless ($r and @$r);
123
124         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
125
126         $_->source($rec) for (@{$res->{field_entries}});
127         $_->record($rec) for (@{$res->{full_rec}});
128
129         return $res;
130 }
131 __PACKAGE__->register_method(  
132         api_name        => "open-ils.ingest.full.biblio.record.readonly",
133         method          => "ro_biblio_ingest_single_record",
134         api_level       => 1,
135         argc            => 1,
136 );                      
137
138 sub ro_biblio_ingest_stream_record {
139         my $self = shift;
140         my $client = shift;
141
142         OpenILS::Application::Ingest->post_init();
143
144         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
145
146         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
147         
148                 my $rec = $resp->content;
149                 last unless (defined $rec);
150
151                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
152                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
153
154                 $_->source($rec) for (@{$res->{field_entries}});
155                 $_->record($rec) for (@{$res->{full_rec}});
156
157                 $client->respond( $res );
158         }
159
160         return undef;
161 }
162 __PACKAGE__->register_method(  
163         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
164         method          => "ro_biblio_ingest_stream_record",
165         api_level       => 1,
166         stream          => 1,
167 );                      
168
169 sub ro_biblio_ingest_stream_xml {
170         my $self = shift;
171         my $client = shift;
172
173         OpenILS::Application::Ingest->post_init();
174
175         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
176
177         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
178         
179                 my $xml = $resp->content;
180                 last unless (defined $xml);
181
182                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
183                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
184
185                 $client->respond( $res );
186         }
187
188         return undef;
189 }
190 __PACKAGE__->register_method(  
191         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
192         method          => "ro_biblio_ingest_stream_xml",
193         api_level       => 1,
194         stream          => 1,
195 );                      
196
197
198 # --------------------------------------------------------------------------------
199 # MARC index extraction
200
201 package OpenILS::Application::Ingest::XPATH;
202 use base qw/OpenILS::Application::Ingest/;
203 use Unicode::Normalize;
204
205 # give this an XML documentElement and an XPATH expression
206 sub xpath_to_string {
207         my $xml = shift;
208         my $xpath = shift;
209         my $ns_uri = shift;
210         my $ns_prefix = shift;
211         my $unique = shift;
212
213         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
214
215         my $string = "";
216
217         # grab the set of matching nodes
218         my @nodes = $xml->findnodes( $xpath );
219         for my $value (@nodes) {
220
221                 # grab all children of the node
222                 my @children = $value->childNodes();
223                 for my $child (@children) {
224
225                         # add the childs content to the growing buffer
226                         my $content = quotemeta($child->textContent);
227                         next if ($unique && $string =~ /$content/);  # uniquify the values
228                         $string .= $child->textContent . " ";
229                 }
230                 if( ! @children ) {
231                         $string .= $value->textContent . " ";
232                 }
233         }
234         return NFD($string);
235 }
236
237 sub class_index_string_xml {
238         my $self = shift;
239         my $client = shift;
240         my $xml = shift;
241         my @classes = @_;
242
243         OpenILS::Application::Ingest->post_init();
244         $xml = $parser->parse_string($xml) unless (ref $xml);
245
246         my %transform_cache;
247         
248         for my $class (@classes) {
249                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
250                 for my $type ( keys %{ $xpathset->{$class} } ) {
251
252                         my $def = $xpathset->{$class}->{$type};
253                         my $sf = $supported_formats{$def->{format}};
254
255                         my $document = $xml;
256
257                         if ($sf->{xslt}) {
258                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
259                                 $transform_cache{$def->{format}} = $document;
260                         }
261
262                         my $value =  xpath_to_string(
263                                         $document->documentElement      => $def->{xpath},
264                                         $sf->{ns}                       => $def->{format},
265                                         1
266                         );
267
268                         next unless $value;
269
270                         $value =~ s/\pM+//sgo;
271                         $value =~ s/\pC+//sgo;
272                         #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
273
274                         $value =~ s/(\w)\./$1/sgo;
275                         $value = lc($value);
276
277                         my $fm = $class_constructor->new;
278                         $fm->value( $value );
279                         $fm->field( $xpathset->{$class}->{$type}->{id} );
280                         $client->respond($fm);
281                 }
282         }
283         return undef;
284 }
285 __PACKAGE__->register_method(  
286         api_name        => "open-ils.ingest.field_entry.class.xml",
287         method          => "class_index_string_xml",
288         api_level       => 1,
289         argc            => 2,
290         stream          => 1,
291 );                      
292
293 sub class_index_string_record {
294         my $self = shift;
295         my $client = shift;
296         my $rec = shift;
297         my @classes = shift;
298
299         OpenILS::Application::Ingest->post_init();
300         my $r = OpenSRF::AppSession
301                         ->create('open-ils.cstore')
302                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
303                         ->gather(1);
304
305         return undef unless ($r and @$r);
306
307         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
308                 $fm->source($rec);
309                 $client->respond($fm);
310         }
311         return undef;
312 }
313 __PACKAGE__->register_method(  
314         api_name        => "open-ils.ingest.field_entry.class.record",
315         method          => "class_index_string_record",
316         api_level       => 1,
317         argc            => 2,
318         stream          => 1,
319 );                      
320
321 sub all_index_string_xml {
322         my $self = shift;
323         my $client = shift;
324         my $xml = shift;
325
326         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
327                 $client->respond($fm);
328         }
329         return undef;
330 }
331 __PACKAGE__->register_method(  
332         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
333         method          => "all_index_string_xml",
334         api_level       => 1,
335         argc            => 1,
336         stream          => 1,
337 );                      
338
339 sub all_index_string_record {
340         my $self = shift;
341         my $client = shift;
342         my $rec = shift;
343
344         OpenILS::Application::Ingest->post_init();
345         my $r = OpenSRF::AppSession
346                         ->create('open-ils.cstore')
347                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
348                         ->gather(1);
349
350         return undef unless ($r and @$r);
351
352         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
353                 $fm->source($rec);
354                 $client->respond($fm);
355         }
356         return undef;
357 }
358 __PACKAGE__->register_method(  
359         api_name        => "open-ils.ingest.extract.field_entry.all.record",
360         method          => "all_index_string_record",
361         api_level       => 1,
362         argc            => 1,
363         stream          => 1,
364 );                      
365
366 # --------------------------------------------------------------------------------
367 # Flat MARC
368
369 package OpenILS::Application::Ingest::FlatMARC;
370 use base qw/OpenILS::Application::Ingest/;
371 use Unicode::Normalize;
372
373
374 sub _marcxml_to_full_rows {
375
376         my $marcxml = shift;
377         my $xmltype = shift || 'metabib';
378
379         my $type = "Fieldmapper::${xmltype}::full_rec";
380
381         my @ns_list;
382         
383         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
384
385         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
386                 next unless $tagline;
387
388                 my $ns = $type->new;
389
390                 $ns->tag( 'LDR' );
391                 my $val = $tagline->textContent;
392                 $val = NFD($val);
393                 $val =~ s/(\pM+)//gso;
394                 $ns->value( $val );
395
396                 push @ns_list, $ns;
397         }
398
399         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
400                 next unless $tagline;
401
402                 my $ns = $type->new;
403
404                 $ns->tag( $tagline->getAttribute( "tag" ) );
405                 my $val = $tagline->textContent;
406                 $val = NFD($val);
407                 $val =~ s/(\pM+)//gso;
408                 $ns->value( $val );
409
410                 push @ns_list, $ns;
411         }
412
413         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
414                 next unless $tagline;
415
416                 my $tag = $tagline->getAttribute( "tag" );
417                 my $ind1 = $tagline->getAttribute( "ind1" );
418                 my $ind2 = $tagline->getAttribute( "ind2" );
419
420                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
421                         next unless $data;
422
423                         my $ns = $type->new;
424
425                         $ns->tag( $tag );
426                         $ns->ind1( $ind1 );
427                         $ns->ind2( $ind2 );
428                         $ns->subfield( $data->getAttribute( "code" ) );
429                         my $val = $data->textContent;
430                         $val = NFD($val);
431                         $val =~ s/(\pM+)//gso;
432                         $ns->value( lc($val) );
433
434                         push @ns_list, $ns;
435                 }
436         }
437
438         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
439         return @ns_list;
440 }
441
442 sub flat_marc_xml {
443         my $self = shift;
444         my $client = shift;
445         my $xml = shift;
446
447         $log->debug("processing [$xml]");
448
449         $xml = $parser->parse_string($xml) unless (ref $xml);
450
451         my $type = 'metabib';
452         $type = 'authority' if ($self->api_name =~ /authority/o);
453
454         OpenILS::Application::Ingest->post_init();
455
456         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
457         return undef;
458 }
459 __PACKAGE__->register_method(  
460         api_name        => "open-ils.ingest.flat_marc.authority.xml",
461         method          => "flat_marc_xml",
462         api_level       => 1,
463         argc            => 1,
464         stream          => 1,
465 );                      
466 __PACKAGE__->register_method(  
467         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
468         method          => "flat_marc_xml",
469         api_level       => 1,
470         argc            => 1,
471         stream          => 1,
472 );                      
473
474 sub flat_marc_record {
475         my $self = shift;
476         my $client = shift;
477         my $rec = shift;
478
479         my $type = 'biblio';
480         $type = 'authority' if ($self->api_name =~ /authority/o);
481
482         OpenILS::Application::Ingest->post_init();
483         my $r = OpenSRF::AppSession
484                         ->create('open-ils.cstore')
485                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
486                         ->gather(1);
487
488
489         return undef unless ($r and $r->marc);
490
491         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
492         for my $row (@rows) {
493                 $client->respond($row);
494                 $log->debug(JSON->perl2JSON($row), DEBUG);
495         }
496         return undef;
497 }
498 __PACKAGE__->register_method(  
499         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
500         method          => "flat_marc_record",
501         api_level       => 1,
502         argc            => 1,
503         stream          => 1,
504 );                      
505 __PACKAGE__->register_method(  
506         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
507         method          => "flat_marc_record",
508         api_level       => 1,
509         argc            => 1,
510         stream          => 1,
511 );                      
512
513 # --------------------------------------------------------------------------------
514 # Fingerprinting
515
516 package OpenILS::Application::Ingest::Biblio::Fingerprint;
517 use base qw/OpenILS::Application::Ingest/;
518 use Unicode::Normalize;
519 use OpenSRF::EX qw/:try/;
520
521 sub biblio_fingerprint_record {
522         my $self = shift;
523         my $client = shift;
524         my $rec = shift;
525
526         OpenILS::Application::Ingest->post_init();
527
528         my $r = OpenSRF::AppSession
529                         ->create('open-ils.cstore')
530                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
531                         ->gather(1);
532
533         return undef unless ($r and $r->marc);
534
535         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
536         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
537         return $fp;
538 }
539 __PACKAGE__->register_method(  
540         api_name        => "open-ils.ingest.fingerprint.record",
541         method          => "biblio_fingerprint_record",
542         api_level       => 1,
543         argc            => 1,
544 );                      
545
546 our $fp_script;
547 sub biblio_fingerprint {
548         my $self = shift;
549         my $client = shift;
550         my $xml = shift;
551
552         $log->internal("Got MARC [$xml]");
553
554         if(!$fp_script) {
555                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
556                 my $conf = OpenSRF::Utils::SettingsClient->new;
557
558                 my $libs        = $conf->config_value(@pfx, 'script_path');
559                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
560                 my $script_libs = (ref($libs)) ? $libs : [$libs];
561
562                 $log->debug("Loading script $script_file for biblio fingerprinting...");
563                 
564                 $fp_script = new OpenILS::Utils::ScriptRunner
565                         ( file          => $script_file,
566                           paths         => $script_libs,
567                           reset_count   => 1000 );
568         }
569
570         $fp_script->insert('environment' => {marc => $xml} => 1);
571
572         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
573         $log->debug("Script for biblio fingerprinting completed successfully...");
574
575         return $res;
576 }
577 __PACKAGE__->register_method(  
578         api_name        => "open-ils.ingest.fingerprint.xml",
579         method          => "biblio_fingerprint",
580         api_level       => 1,
581         argc            => 1,
582 );                      
583
584
585 1;
586
587 __END__
588
589 sub in_transaction {
590         OpenILS::Application::Ingest->post_init();
591         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
592 }
593
594 sub begin_transaction {
595         my $self = shift;
596         my $client = shift;
597         
598         OpenILS::Application::Ingest->post_init();
599         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
600         
601         try {
602                 if (!$outer_xact) {
603                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
604                         #__PACKAGE__->st_sess->connect;
605                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
606                         unless (defined $r and $r) {
607                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
608                                 #__PACKAGE__->st_sess->disconnect;
609                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
610                         }
611                 }
612         } otherwise {
613                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
614         };
615
616         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
617 }
618
619 sub rollback_transaction {
620         my $self = shift;
621         my $client = shift;
622
623         OpenILS::Application::Ingest->post_init();
624         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
625
626         try {
627                 if ($outer_xact) {
628                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
629                 } else {
630                         $log->debug("Ingest isn't inside a transaction.", INFO);
631                 }
632         } catch Error with {
633                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
634         };
635
636         return 1;
637 }
638
639 sub commit_transaction {
640         my $self = shift;
641         my $client = shift;
642
643         OpenILS::Application::Ingest->post_init();
644         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
645
646         try {
647                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
648                 if ($outer_xact) {
649                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
650                         unless (defined $r and $r) {
651                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
652                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
653                         }
654                         #__PACKAGE__->st_sess->disconnect;
655                 } else {
656                         $log->debug("Ingest isn't inside a transaction.", INFO);
657                 }
658         } catch Error with {
659                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
660         };
661
662         return 1;
663 }
664
665 sub storage_req {
666         my $self = shift;
667         my $method = shift;
668         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
669         return shift( @res );
670 }
671
672 sub scrub_authority_record {
673         my $self = shift;
674         my $client = shift;
675         my $rec = shift;
676
677         my $commit = 0;
678         if (!OpenILS::Application::Ingest->in_transaction) {
679                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
680                 $commit = 1;
681         }
682
683         my $success = 1;
684         try {
685                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
686
687                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
688                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
689
690                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
691         } otherwise {
692                 $log->debug('Scrubbing failed : '.shift(), ERROR);
693                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
694                 $success = 0;
695         };
696
697         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
698         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
699         return $success;
700 }
701 __PACKAGE__->register_method(  
702         api_name        => "open-ils.worm.scrub.authority",
703         method          => "scrub_authority_record",
704         api_level       => 1,
705         argc            => 1,
706 );                      
707
708
709 sub scrub_metabib_record {
710         my $self = shift;
711         my $client = shift;
712         my $rec = shift;
713
714         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
715                 $rec = OpenILS::Application::Ingest->storage_req(
716                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
717                 );
718         }
719
720         my $commit = 0;
721         if (!OpenILS::Application::Ingest->in_transaction) {
722                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
723                 $commit = 1;
724         }
725
726         my $success = 1;
727         try {
728                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
729                 
730                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
731                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
732                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
733                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
734                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
735                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
736                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
737                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
738
739                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
740                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
741
742                 for my $mr (@$masters) {
743                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
744                         my $others = OpenILS::Application::Ingest->storage_req(
745                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
746
747                         if (@$others) {
748                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
749                                 $mr->master_record($others->[0]->source);
750                                 OpenILS::Application::Ingest->storage_req(
751                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
752                                         { id => $mr->id },
753                                         { master_record => $others->[0]->source, mods => undef }
754                                 );
755                         } else {
756                                 warn "Removing metarecord whose master is $rec";
757                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
758                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
759                                 warn "Metarecord removed";
760                                 $log->debug( "Metarecord removed", DEBUG);
761                         }
762                 }
763
764                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
765
766         } otherwise {
767                 $log->debug('Scrubbing failed : '.shift(), ERROR);
768                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
769                 $success = 0;
770         };
771
772         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
773         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
774         return $success;
775 }
776 __PACKAGE__->register_method(  
777         api_name        => "open-ils.worm.scrub.biblio",
778         method          => "scrub_metabib_record",
779         api_level       => 1,
780         argc            => 1,
781 );                      
782
783 sub wormize_biblio_metarecord {
784         my $self = shift;
785         my $client = shift;
786         my $mrec = shift;
787
788         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
789
790         my $count = 0;
791         for my $r (@$recs) {
792                 my $success = 0;
793                 try {
794                         $success = wormize_biblio_record($self => $client => $r->source);
795                         $client->respond(
796                                 { record  => $r->source,
797                                   metarecord => $rec->metarecord,
798                                   success => $success,
799                                 }
800                         );
801                 } catch Error with {
802                         my $e = shift;
803                         $client->respond(
804                                 { record  => $r->source,
805                                   metarecord => $rec->metarecord,
806                                   success => $success,
807                                   error   => $e,
808                                 }
809                         );
810                 };
811         }
812         return undef;
813 }
814 __PACKAGE__->register_method(
815         api_name        => "open-ils.worm.wormize.metarecord",
816         method          => "wormize_biblio_metarecord",
817         api_level       => 1,
818         argc            => 1,
819         stream          => 1,
820 );
821 __PACKAGE__->register_method(
822         api_name        => "open-ils.worm.wormize.metarecord.nomap",
823         method          => "wormize_biblio_metarecord",
824         api_level       => 1,
825         argc            => 1,
826         stream          => 1,
827 );
828 __PACKAGE__->register_method(
829         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
830         method          => "wormize_biblio_metarecord",
831         api_level       => 1,
832         argc            => 1,
833         stream          => 1,
834 );
835 __PACKAGE__->register_method(
836         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
837         method          => "wormize_biblio_metarecord",
838         api_level       => 1,
839         argc            => 1,
840         stream          => 1,
841 );
842
843
844 sub wormize_biblio_record {
845         my $self = shift;
846         my $client = shift;
847         my $rec = shift;
848
849         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
850                 $rec = OpenILS::Application::Ingest->storage_req(
851                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
852                 );
853         }
854
855
856         my $commit = 0;
857         if (!OpenILS::Application::Ingest->in_transaction) {
858                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
859                 $commit = 1;
860         }
861
862         my $success = 1;
863         try {
864                 # clean up the cruft
865                 unless ($self->api_name =~ /noscrub/o) {
866                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
867                 }
868
869                 # now redo 'em
870                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
871
872                 my @full_rec = ();
873                 my @rec_descriptor = ();
874                 my %field_entry = (
875                         title   => [],
876                         author  => [],
877                         subject => [],
878                         keyword => [],
879                         series  => [],
880                 );
881                 my %metarecord = ();
882                 my @source_map = ();
883                 for my $r (@$bibs) {
884                         try {
885                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
886
887                                 my $xml = $parser->parse_string($r->marc);
888
889                                 #update the fingerprint
890                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
891                                 OpenILS::Application::Ingest->storage_req(
892                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
893                                         { id => $r->id },
894                                         { fingerprint => $fp->{fingerprint},
895                                           quality     => int($fp->{quality}) }
896                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
897
898                                 # the full_rec stuff
899                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
900                                         $fr->record( $r->id );
901                                         push @full_rec, $fr;
902                                 }
903
904                                 # the rec_descriptor stuff
905                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
906                                 $rd->record( $r->id );
907                                 push @rec_descriptor, $rd;
908                         
909                                 # the indexing field entry stuff
910                                 for my $class ( qw/title author subject keyword series/ ) {
911                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
912                                                 $fe->source( $r->id );
913                                                 push @{$field_entry{$class}}, $fe;
914                                         }
915                                 }
916
917                                 unless ($self->api_name =~ /nomap/o) {
918                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
919                                 
920                                         unless ($mr) {
921                                                 $mr = Fieldmapper::metabib::metarecord->new;
922                                                 $mr->fingerprint( $fp->{fingerprint} );
923                                                 $mr->master_record( $r->id );
924                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
925                                         }
926
927                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
928                                         $mr_map->metarecord( $mr->id );
929                                         $mr_map->source( $r->id );
930                                         push @source_map, $mr_map;
931
932                                         $metarecord{$mr->id} = $mr;
933                                 }
934                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
935                         } otherwise {
936                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
937                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
938                         };
939                 }
940                 
941
942                 if (@rec_descriptor) {
943                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
944
945                         OpenILS::Application::Ingest->storage_req(
946                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
947                                 @source_map
948                         ) if (@source_map);
949
950                         for my $mr ( values %metarecord ) {
951                                 my $sources = OpenILS::Application::Ingest->storage_req(
952                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
953                                         $mr->id
954                                 );
955
956                                 my $bibs = OpenILS::Application::Ingest->storage_req(
957                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
958                                         [ map { $_->source } @$sources ]
959                                 );
960
961                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
962
963                                 OpenILS::Application::Ingest->storage_req(
964                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
965                                         { id => $mr->id },
966                                         { master_record => $master->id, mods => undef }
967                                 );
968                         }
969
970                         OpenILS::Application::Ingest->storage_req(
971                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
972                                 @rec_descriptor
973                         ) if (@rec_descriptor);
974
975                         OpenILS::Application::Ingest->storage_req(
976                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
977                                 @full_rec
978                         ) if (@full_rec);
979
980                         OpenILS::Application::Ingest->storage_req(
981                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
982                                 @{ $field_entry{title} }
983                         ) if (@{ $field_entry{title} });
984
985                         OpenILS::Application::Ingest->storage_req(
986                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
987                                 @{ $field_entry{author} }
988                         ) if (@{ $field_entry{author} });
989                         
990                         OpenILS::Application::Ingest->storage_req(
991                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
992                                 @{ $field_entry{subject} }
993                         ) if (@{ $field_entry{subject} });
994
995                         OpenILS::Application::Ingest->storage_req(
996                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
997                                 @{ $field_entry{keyword} }
998                         ) if (@{ $field_entry{keyword} });
999
1000                         OpenILS::Application::Ingest->storage_req(
1001                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1002                                 @{ $field_entry{series} }
1003                         ) if (@{ $field_entry{series} });
1004
1005                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1006                 } else {
1007                         $success = 0;
1008                 }
1009
1010         } otherwise {
1011                 $log->debug('Wormization failed : '.shift(), ERROR);
1012                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1013                 $success = 0;
1014         };
1015
1016         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1017         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1018         return $success;
1019 }
1020 __PACKAGE__->register_method(
1021         api_name        => "open-ils.worm.wormize.biblio",
1022         method          => "wormize_biblio_record",
1023         api_level       => 1,
1024         argc            => 1,
1025 );
1026 __PACKAGE__->register_method(
1027         api_name        => "open-ils.worm.wormize.biblio.nomap",
1028         method          => "wormize_biblio_record",
1029         api_level       => 1,
1030         argc            => 1,
1031 );
1032 __PACKAGE__->register_method(
1033         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1034         method          => "wormize_biblio_record",
1035         api_level       => 1,
1036         argc            => 1,
1037 );
1038 __PACKAGE__->register_method(
1039         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1040         method          => "wormize_biblio_record",
1041         api_level       => 1,
1042         argc            => 1,
1043 );
1044
1045 sub wormize_authority_record {
1046         my $self = shift;
1047         my $client = shift;
1048         my $rec = shift;
1049
1050         my $commit = 0;
1051         if (!OpenILS::Application::Ingest->in_transaction) {
1052                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1053                 $commit = 1;
1054         }
1055
1056         my $success = 1;
1057         try {
1058                 # clean up the cruft
1059                 unless ($self->api_name =~ /noscrub/o) {
1060                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1061                 }
1062
1063                 # now redo 'em
1064                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1065
1066                 my @full_rec = ();
1067                 my @rec_descriptor = ();
1068                 for my $r (@$bibs) {
1069                         my $xml = $parser->parse_string($r->marc);
1070
1071                         # the full_rec stuff
1072                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1073                                 $fr->record( $r->id );
1074                                 push @full_rec, $fr;
1075                         }
1076
1077                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1078                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1079                         #$rd->record( $r->id );
1080                         #push @rec_descriptor, $rd;
1081                         
1082                 }
1083
1084                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1085
1086                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1087                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1088
1089                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1090
1091         } otherwise {
1092                 $log->debug('Wormization failed : '.shift(), ERROR);
1093                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1094                 $success = 0;
1095         };
1096
1097         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1098         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1099         return $success;
1100 }
1101 __PACKAGE__->register_method(
1102         api_name        => "open-ils.worm.wormize.authority",
1103         method          => "wormize_authority_record",
1104         api_level       => 1,
1105         argc            => 1,
1106 );
1107 __PACKAGE__->register_method(
1108         api_name        => "open-ils.worm.wormize.authority.noscrub",
1109         method          => "wormize_authority_record",
1110         api_level       => 1,
1111         argc            => 1,
1112 );
1113
1114
1115 # --------------------------------------------------------------------------------
1116 # MARC index extraction
1117
1118 package OpenILS::Application::Ingest::XPATH;
1119 use base qw/OpenILS::Application::Ingest/;
1120 use Unicode::Normalize;
1121
1122 # give this a MODS documentElement and an XPATH expression
1123 sub _xpath_to_string {
1124         my $xml = shift;
1125         my $xpath = shift;
1126         my $ns_uri = shift;
1127         my $ns_prefix = shift;
1128         my $unique = shift;
1129
1130         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1131
1132         my $string = "";
1133
1134         # grab the set of matching nodes
1135         my @nodes = $xml->findnodes( $xpath );
1136         for my $value (@nodes) {
1137
1138                 # grab all children of the node
1139                 my @children = $value->childNodes();
1140                 for my $child (@children) {
1141
1142                         # add the childs content to the growing buffer
1143                         my $content = quotemeta($child->textContent);
1144                         next if ($unique && $string =~ /$content/);  # uniquify the values
1145                         $string .= $child->textContent . " ";
1146                 }
1147                 if( ! @children ) {
1148                         $string .= $value->textContent . " ";
1149                 }
1150         }
1151         return NFD($string);
1152 }
1153
1154 sub class_all_index_string_xml {
1155         my $self = shift;
1156         my $client = shift;
1157         my $xml = shift;
1158         my $class = shift;
1159
1160         OpenILS::Application::Ingest->post_init();
1161         $xml = $parser->parse_string($xml) unless (ref $xml);
1162         
1163         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1164         for my $type ( keys %{ $xpathset->{$class} } ) {
1165                 my $value =  _xpath_to_string(
1166                                 $mods_sheet->transform($xml)->documentElement,
1167                                 $xpathset->{$class}->{$type}->{xpath},
1168                                 "http://www.loc.gov/mods/",
1169                                 "mods",
1170                                 1
1171                 );
1172
1173                 next unless $value;
1174
1175                 $value =~ s/\pM+//sgo;
1176                 $value =~ s/\pC+//sgo;
1177                 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1178
1179                 $value =~ s/(\w)\./$1/sgo;
1180                 $value = lc($value);
1181
1182                 my $fm = $class_constructor->new;
1183                 $fm->value( $value );
1184                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1185                 $client->respond($fm);
1186         }
1187         return undef;
1188 }
1189 __PACKAGE__->register_method(  
1190         api_name        => "open-ils.worm.field_entry.class.xml",
1191         method          => "class_all_index_string_xml",
1192         api_level       => 1,
1193         argc            => 1,
1194         stream          => 1,
1195 );                      
1196
1197 sub class_all_index_string_record {
1198         my $self = shift;
1199         my $client = shift;
1200         my $rec = shift;
1201         my $class = shift;
1202
1203         OpenILS::Application::Ingest->post_init();
1204         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1205
1206         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1207                 $fm->source($rec);
1208                 $client->respond($fm);
1209         }
1210         return undef;
1211 }
1212 __PACKAGE__->register_method(  
1213         api_name        => "open-ils.worm.field_entry.class.record",
1214         method          => "class_all_index_string_record",
1215         api_level       => 1,
1216         argc            => 1,
1217         stream          => 1,
1218 );                      
1219
1220
1221 sub class_index_string_xml {
1222         my $self = shift;
1223         my $client = shift;
1224         my $xml = shift;
1225         my $class = shift;
1226         my $type = shift;
1227
1228         OpenILS::Application::Ingest->post_init();
1229         $xml = $parser->parse_string($xml) unless (ref $xml);
1230         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1231 }
1232 __PACKAGE__->register_method(  
1233         api_name        => "open-ils.worm.class.type.xml",
1234         method          => "class_index_string_xml",
1235         api_level       => 1,
1236         argc            => 1,
1237 );                      
1238
1239 sub class_index_string_record {
1240         my $self = shift;
1241         my $client = shift;
1242         my $rec = shift;
1243         my $class = shift;
1244         my $type = shift;
1245
1246         OpenILS::Application::Ingest->post_init();
1247         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1248
1249         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1250         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1251         return $d;
1252 }
1253 __PACKAGE__->register_method(  
1254         api_name        => "open-ils.worm.class.type.record",
1255         method          => "class_index_string_record",
1256         api_level       => 1,
1257         argc            => 1,
1258 );                      
1259
1260 sub xml_xpath {
1261         my $self = shift;
1262         my $client = shift;
1263         my $xml = shift;
1264         my $xpath = shift;
1265         my $uri = shift;
1266         my $prefix = shift;
1267         my $unique = shift;
1268
1269         OpenILS::Application::Ingest->post_init();
1270         $xml = $parser->parse_string($xml) unless (ref $xml);
1271         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1272 }
1273 __PACKAGE__->register_method(  
1274         api_name        => "open-ils.worm.xpath.xml",
1275         method          => "xml_xpath",
1276         api_level       => 1,
1277         argc            => 1,
1278 );                      
1279
1280 sub record_xpath {
1281         my $self = shift;
1282         my $client = shift;
1283         my $rec = shift;
1284         my $xpath = shift;
1285         my $uri = shift;
1286         my $prefix = shift;
1287         my $unique = shift;
1288
1289         OpenILS::Application::Ingest->post_init();
1290         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1291
1292         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1293         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1294         return $d;
1295 }
1296 __PACKAGE__->register_method(  
1297         api_name        => "open-ils.worm.xpath.record",
1298         method          => "record_xpath",
1299         api_level       => 1,
1300         argc            => 1,
1301 );                      
1302
1303
1304 # --------------------------------------------------------------------------------
1305 # MARC Descriptor
1306
1307 package OpenILS::Application::Ingest::Biblio::Leader;
1308 use base qw/OpenILS::Application::Ingest/;
1309 use Unicode::Normalize;
1310
1311 our %marc_type_groups = (
1312         BKS => q/[at]{1}/,
1313         SER => q/[a]{1}/,
1314         VIS => q/[gkro]{1}/,
1315         MIX => q/[p]{1}/,
1316         MAP => q/[ef]{1}/,
1317         SCO => q/[cd]{1}/,
1318         REC => q/[ij]{1}/,
1319         COM => q/[m]{1}/,
1320 );
1321
1322 sub _type_re {
1323         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1324         return qr/$re/;
1325 }
1326
1327 our %biblio_descriptor_code = (
1328         item_type => sub { substr($ldr,6,1); },
1329         item_form =>
1330                 sub {
1331                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1332                                 return substr($oo8,29,1);
1333                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1334                                 return substr($oo8,23,1);
1335                         }
1336                         return ' ';
1337                 },
1338         bib_level => sub { substr($ldr,7,1); },
1339         control_type => sub { substr($ldr,8,1); },
1340         char_encoding => sub { substr($ldr,9,1); },
1341         enc_level => sub { substr($ldr,17,1); },
1342         cat_form => sub { substr($ldr,18,1); },
1343         pub_status => sub { substr($ldr,5,1); },
1344         item_lang => sub { substr($oo8,35,3); },
1345         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1346         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1347         audience => sub { substr($oo8,22,1); },
1348 );
1349
1350 sub _extract_biblio_descriptors {
1351         my $xml = shift;
1352
1353         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1354         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1355         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1356
1357         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1358         for my $rd_field ( keys %biblio_descriptor_code ) {
1359                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1360         }
1361
1362         return $rd_obj;
1363 }
1364
1365 sub extract_biblio_desc_xml {
1366         my $self = shift;
1367         my $client = shift;
1368         my $xml = shift;
1369
1370         $xml = $parser->parse_string($xml) unless (ref $xml);
1371
1372         return _extract_biblio_descriptors( $xml );
1373 }
1374 __PACKAGE__->register_method(  
1375         api_name        => "open-ils.worm.biblio_leader.xml",
1376         method          => "extract_biblio_desc_xml",
1377         api_level       => 1,
1378         argc            => 1,
1379 );                      
1380
1381 sub extract_biblio_desc_record {
1382         my $self = shift;
1383         my $client = shift;
1384         my $rec = shift;
1385
1386         OpenILS::Application::Ingest->post_init();
1387         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1388
1389         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1390         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1391         return $d;
1392 }
1393 __PACKAGE__->register_method(  
1394         api_name        => "open-ils.worm.biblio_leader.record",
1395         method          => "extract_biblio_desc_record",
1396         api_level       => 1,
1397         argc            => 1,
1398 );                      
1399
1400 # --------------------------------------------------------------------------------
1401 # Flat MARC
1402
1403 package OpenILS::Application::Ingest::FlatMARC;
1404 use base qw/OpenILS::Application::Ingest/;
1405 use Unicode::Normalize;
1406
1407
1408 sub _marcxml_to_full_rows {
1409
1410         my $marcxml = shift;
1411         my $xmltype = shift || 'metabib';
1412
1413         my $type = "Fieldmapper::${xmltype}::full_rec";
1414
1415         my @ns_list;
1416         
1417         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1418
1419         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1420                 next unless $tagline;
1421
1422                 my $ns = $type->new;
1423
1424                 $ns->tag( 'LDR' );
1425                 my $val = $tagline->textContent;
1426                 $val = NFD($val);
1427                 $val =~ s/(\pM+)//gso;
1428                 $ns->value( $val );
1429
1430                 push @ns_list, $ns;
1431         }
1432
1433         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1434                 next unless $tagline;
1435
1436                 my $ns = $type->new;
1437
1438                 $ns->tag( $tagline->getAttribute( "tag" ) );
1439                 my $val = $tagline->textContent;
1440                 $val = NFD($val);
1441                 $val =~ s/(\pM+)//gso;
1442                 $ns->value( $val );
1443
1444                 push @ns_list, $ns;
1445         }
1446
1447         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1448                 next unless $tagline;
1449
1450                 my $tag = $tagline->getAttribute( "tag" );
1451                 my $ind1 = $tagline->getAttribute( "ind1" );
1452                 my $ind2 = $tagline->getAttribute( "ind2" );
1453
1454                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1455                         next unless $data;
1456
1457                         my $ns = $type->new;
1458
1459                         $ns->tag( $tag );
1460                         $ns->ind1( $ind1 );
1461                         $ns->ind2( $ind2 );
1462                         $ns->subfield( $data->getAttribute( "code" ) );
1463                         my $val = $data->textContent;
1464                         $val = NFD($val);
1465                         $val =~ s/(\pM+)//gso;
1466                         $ns->value( lc($val) );
1467
1468                         push @ns_list, $ns;
1469                 }
1470         }
1471
1472         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1473         return @ns_list;
1474 }
1475
1476 sub flat_marc_xml {
1477         my $self = shift;
1478         my $client = shift;
1479         my $xml = shift;
1480
1481         $xml = $parser->parse_string($xml) unless (ref $xml);
1482
1483         my $type = 'metabib';
1484         $type = 'authority' if ($self->api_name =~ /authority/o);
1485
1486         OpenILS::Application::Ingest->post_init();
1487
1488         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1489         return undef;
1490 }
1491 __PACKAGE__->register_method(  
1492         api_name        => "open-ils.worm.flat_marc.authority.xml",
1493         method          => "flat_marc_xml",
1494         api_level       => 1,
1495         argc            => 1,
1496         stream          => 1,
1497 );                      
1498 __PACKAGE__->register_method(  
1499         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1500         method          => "flat_marc_xml",
1501         api_level       => 1,
1502         argc            => 1,
1503         stream          => 1,
1504 );                      
1505
1506 sub flat_marc_record {
1507         my $self = shift;
1508         my $client = shift;
1509         my $rec = shift;
1510
1511         my $type = 'biblio';
1512         $type = 'authority' if ($self->api_name =~ /authority/o);
1513
1514         OpenILS::Application::Ingest->post_init();
1515         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1516
1517         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1518         return undef;
1519 }
1520 __PACKAGE__->register_method(  
1521         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1522         method          => "flat_marc_record",
1523         api_level       => 1,
1524         argc            => 1,
1525         stream          => 1,
1526 );                      
1527 __PACKAGE__->register_method(  
1528         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1529         method          => "flat_marc_record",
1530         api_level       => 1,
1531         argc            => 1,
1532         stream          => 1,
1533 );                      
1534
1535
1536 # --------------------------------------------------------------------------------
1537 # Fingerprinting
1538
1539 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1540 use base qw/OpenILS::Application::Ingest/;
1541 use Unicode::Normalize;
1542 use OpenSRF::EX qw/:try/;
1543
1544 my @fp_mods_xpath = (
1545         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1546                         title   => {
1547                                         xpath   => [
1548                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1549                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1550                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1551                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1552                                         ],
1553                                         fixup   => sub {
1554                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1555                                                         $text = NFD($text);
1556                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1557                                                         $text =~ s/\pM+//gso;
1558                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1559                                                         $text = lc($text);
1560                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1561                                                         $text =~ s/\s+/ /sgo;
1562                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1563                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1564                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1565                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1566                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1567                                                         $text =~ s/\[.[^\]]+\]//sgo;
1568                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1569                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1570                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1571                                                 },
1572                         },
1573                         author  => {
1574                                         xpath   => [
1575                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1576                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1577                                         ],
1578                                         fixup   => sub {
1579                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1580                                                         $text = NFD($text);
1581                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1582                                                         $text =~ s/\pM+//gso;
1583                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1584                                                         $text = lc($text);
1585                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1586                                                         $text =~ s/\s+/ /sgo;
1587                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1588                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1589                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1590                                                         $text =~ s/,?\s+.*$//sgo;
1591                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1592                                                 },
1593                         },
1594         ],
1595
1596         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1597                         title   => {
1598                                         xpath   => [
1599                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1600                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1601                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1602                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1603                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1604                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1605                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1606                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1607                                         ],
1608                                         fixup   => sub {
1609                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1610                                                         $text = NFD($text);
1611                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1612                                                         $text =~ s/\pM+//gso;
1613                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1614                                                         $text = lc($text);
1615                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1616                                                         $text =~ s/\s+/ /sgo;
1617                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1618                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1619                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1620                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1621                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1622                                                         $text =~ s/\[.[^\]]+\]//sgo;
1623                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1624                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1625                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1626                                                 },
1627                         },
1628                         author  => {
1629                                         xpath   => [
1630                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1631                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1632                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1633                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1634                                         ],
1635                                         fixup   => sub {
1636                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1637                                                         $text = NFD($text);
1638                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1639                                                         $text =~ s/\pM+//gso;
1640                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1641                                                         $text = lc($text);
1642                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1643                                                         $text =~ s/\s+/ /sgo;
1644                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1645                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1646                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1647                                                         $text =~ s/,?\s+.*$//sgo;
1648                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1649                                                 },
1650                         },
1651         ],
1652
1653 );
1654
1655 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1656
1657 sub _fp_mods {
1658         my $mods = shift;
1659         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1660
1661         my $fp_string = '';
1662
1663         my $match_index = 0;
1664         my $block_index = 1;
1665         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1666                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1667
1668                         my $block_name_index = 0;
1669                         my $block_value_index = 1;
1670                         my $block = $fp_mods_xpath[$block_index];
1671                         while ( my $part = $$block[$block_value_index] ) {
1672                                 local $text;
1673                                 for my $xpath ( @{ $part->{xpath} } ) {
1674                                         $text = $mods->findvalue( $xpath );
1675                                         last if ($text);
1676                                 }
1677
1678                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1679
1680                                 if ($text) {
1681                                         $$part{fixup}->();
1682                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1683                                         $fp_string .= $text;
1684                                 }
1685
1686                                 $block_name_index += 2;
1687                                 $block_value_index += 2;
1688                         }
1689                 }
1690                 if ($fp_string) {
1691                         $fp_string =~ s/\W+//gso;
1692                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1693                         return $fp_string;
1694                 }
1695
1696                 $match_index += 2;
1697                 $block_index += 2;
1698         }
1699         return undef;
1700 }
1701
1702 sub refingerprint_bibrec {
1703         my $self = shift;
1704         my $client = shift;
1705         my $rec = shift;
1706
1707         my $commit = 0;
1708         if (!OpenILS::Application::Ingest->in_transaction) {
1709                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1710                 $commit = 1;
1711         }
1712
1713         my $success = 1;
1714         try {
1715                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1716                 for my $b (@$bibs) {
1717                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1718
1719                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1720
1721                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1722
1723                                 OpenILS::Application::Ingest->storage_req(
1724                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1725                                         { id => $b->id },
1726                                         { fingerprint => $fp->{fingerprint},
1727                                           quality     => $fp->{quality} }
1728                                 );
1729
1730                                 if ($self->api_name !~ /nomap/o) {
1731                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
1732                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1733                                                 $b->id
1734                                         );
1735
1736                                         my $old_mrid;
1737                                         if (ref($old_source_map) and @$old_source_map) {
1738                                                 for my $m (@$old_source_map) {
1739                                                         $old_mrid = $m->metarecord;
1740                                                         OpenILS::Application::Ingest->storage_req(
1741                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1742                                                                 $m->id
1743                                                         );
1744                                                 }
1745                                         }
1746
1747                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
1748                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1749                                                         { metarecord => $old_mrid }
1750                                         ) if ($old_mrid);
1751
1752                                         if (ref($old_sm) and @$old_sm == 0) {
1753                                                 OpenILS::Application::Ingest->storage_req(
1754                                                         'open-ils.storage.direct.metabib.metarecord.delete',
1755                                                         $old_mrid
1756                                                 );
1757                                         }
1758
1759                                         my $mr = OpenILS::Application::Ingest->storage_req(
1760                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1761                                                         { fingerprint => $fp->{fingerprint} }
1762                                         )->[0];
1763                                 
1764                                         unless ($mr) {
1765                                                 $mr = Fieldmapper::metabib::metarecord->new;
1766                                                 $mr->fingerprint( $fp->{fingerprint} );
1767                                                 $mr->master_record( $b->id );
1768                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1769                                         }
1770
1771                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1772                                         $mr_map->metarecord( $mr->id );
1773                                         $mr_map->source( $b->id );
1774                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1775
1776                                 }
1777                         }
1778                         $client->respond($b->id);
1779                 }
1780
1781         } otherwise {
1782                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1783                 $success = 0;
1784         };
1785
1786         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1787         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1788         return undef;
1789 }
1790 __PACKAGE__->register_method(  
1791         api_name        => "open-ils.worm.fingerprint.record.update",
1792         method          => "refingerprint_bibrec",
1793         api_level       => 1,
1794         argc            => 1,
1795         stream          => 1,
1796 );                      
1797
1798 __PACKAGE__->register_method(  
1799         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
1800         method          => "refingerprint_bibrec",
1801         api_level       => 1,
1802         argc            => 1,
1803 );                      
1804
1805 =comment
1806
1807 sub fingerprint_bibrec {
1808         my $self = shift;
1809         my $client = shift;
1810         my $rec = shift;
1811
1812         OpenILS::Application::Ingest->post_init();
1813         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1814
1815         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1816         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1817         return $fp;
1818
1819 }
1820 __PACKAGE__->register_method(  
1821         api_name        => "open-ils.worm.fingerprint.record",
1822         method          => "fingerprint_bibrec",
1823         api_level       => 0,
1824         argc            => 1,
1825 );                      
1826
1827
1828 sub fingerprint_mods {
1829         my $self = shift;
1830         my $client = shift;
1831         my $xml = shift;
1832
1833         OpenILS::Application::Ingest->post_init();
1834         my $mods = $parser->parse_string($xml)->documentElement;
1835
1836         return _fp_mods( $mods );
1837 }
1838 __PACKAGE__->register_method(  
1839         api_name        => "open-ils.worm.fingerprint.mods",
1840         method          => "fingerprint_mods",
1841         api_level       => 1,
1842         argc            => 1,
1843 );                      
1844
1845 sub fingerprint_marc {
1846         my $self = shift;
1847         my $client = shift;
1848         my $xml = shift;
1849
1850         $xml = $parser->parse_string($xml) unless (ref $xml);
1851
1852         OpenILS::Application::Ingest->post_init();
1853         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1854         $log->debug("Returning [$fp] as fingerprint", INFO);
1855         return $fp;
1856 }
1857 __PACKAGE__->register_method(  
1858         api_name        => "open-ils.worm.fingerprint.marc",
1859         method          => "fingerprint_marc",
1860         api_level       => 1,
1861         argc            => 1,
1862 );                      
1863
1864
1865 =cut
1866
1867 sub biblio_fingerprint_record {
1868         my $self = shift;
1869         my $client = shift;
1870         my $rec = shift;
1871
1872         OpenILS::Application::Ingest->post_init();
1873
1874         my $marc = OpenILS::Application::Ingest
1875                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1876                         ->marc;
1877
1878         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1879         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1880         return $fp;
1881 }
1882 __PACKAGE__->register_method(  
1883         api_name        => "open-ils.worm.fingerprint.record",
1884         method          => "biblio_fingerprint_record",
1885         api_level       => 1,
1886         argc            => 1,
1887 );                      
1888
1889 our $fp_script;
1890 sub biblio_fingerprint {
1891         my $self = shift;
1892         my $client = shift;
1893         my $marc = shift;
1894
1895         OpenILS::Application::Ingest->post_init();
1896
1897         $marc = $parser->parse_string($marc) unless (ref $marc);
1898
1899         my $mods = OpenILS::Application::Ingest::entityize(
1900                 $mods_sheet
1901                         ->transform( $marc )
1902                         ->documentElement
1903                         ->toString,
1904                 'D'
1905         );
1906
1907         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
1908
1909         warn $marc;
1910         $log->internal("Got MARC [$marc]");
1911         $log->internal("Created MODS [$mods]");
1912
1913         if(!$fp_script) {
1914                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1915                 my $conf = OpenSRF::Utils::SettingsClient->new;
1916
1917                 my $libs        = $conf->config_value(@pfx, 'script_path');
1918                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1919                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1920
1921                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1922                 
1923                 $fp_script = new OpenILS::Utils::ScriptRunner
1924                         ( file          => $script_file,
1925                           paths         => $script_libs,
1926                           reset_count   => 1000 );
1927         }
1928
1929         $log->debug("Applying environment for biblio fingerprinting...");
1930
1931         my $env = {marc => $marc, mods => $mods};
1932         #my $res = {fingerprint => '', quality => '0'};
1933
1934         $fp_script->insert('environment' => $env);
1935         #$fp_script->insert('result' => $res);
1936
1937         $log->debug("Running script for biblio fingerprinting...");
1938
1939         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
1940
1941         $log->debug("Script for biblio fingerprinting completed successfully...");
1942
1943         return $res;
1944 }
1945 __PACKAGE__->register_method(  
1946         api_name        => "open-ils.worm.fingerprint.marc",
1947         method          => "biblio_fingerprint",
1948         api_level       => 1,
1949         argc            => 1,
1950 );                      
1951
1952 # --------------------------------------------------------------------------------
1953
1954 1;
1955
1956 __END__
1957 my $in_xact;
1958 my $begin;
1959 my $commit;
1960 my $rollback;
1961 my $lookup;
1962 my $update_entry;
1963 my $mr_lookup;
1964 my $mr_update;
1965 my $mr_create;
1966 my $create_source_map;
1967 my $sm_lookup;
1968 my $rm_old_rd;
1969 my $rm_old_sm;
1970 my $rm_old_fr;
1971 my $rm_old_tr;
1972 my $rm_old_ar;
1973 my $rm_old_sr;
1974 my $rm_old_kr;
1975 my $rm_old_ser;
1976
1977 my $fr_create;
1978 my $rd_create;
1979 my $create = {};
1980
1981 my %descriptor_code = (
1982         item_type => 'substr($ldr,6,1)',
1983         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1984         bib_level => 'substr($ldr,7,1)',
1985         control_type => 'substr($ldr,8,1)',
1986         char_encoding => 'substr($ldr,9,1)',
1987         enc_level => 'substr($ldr,17,1)',
1988         cat_form => 'substr($ldr,18,1)',
1989         pub_status => 'substr($ldr,5,1)',
1990         item_lang => 'substr($oo8,35,3)',
1991         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1992         audience => 'substr($oo8,22,1)',
1993 );
1994
1995 sub wormize {
1996
1997         my $self = shift;
1998         my $client = shift;
1999         my @docids = @_;
2000
2001         my $no_map = 0;
2002         if ($self->api_name =~ /no_map/o) {
2003                 $no_map = 1;
2004         }
2005
2006         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2007                 unless ($in_xact);
2008         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2009                 unless ($begin);
2010         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2011                 unless ($commit);
2012         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2013                 unless ($rollback);
2014         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2015                 unless ($sm_lookup);
2016         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2017                 unless ($mr_lookup);
2018         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2019                 unless ($mr_update);
2020         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2021                 unless ($lookup);
2022         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2023                 unless ($update_entry);
2024         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2025                 unless ($rm_old_sm);
2026         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2027                 unless ($rm_old_rd);
2028         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2029                 unless ($rm_old_fr);
2030         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2031                 unless ($rm_old_tr);
2032         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2033                 unless ($rm_old_ar);
2034         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2035                 unless ($rm_old_sr);
2036         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2037                 unless ($rm_old_kr);
2038         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2039                 unless ($rm_old_ser);
2040         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2041                 unless ($mr_create);
2042         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2043                 unless ($create_source_map);
2044         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2045                 unless ($rd_create);
2046         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2047                 unless ($fr_create);
2048         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2049                 unless ($$create{title});
2050         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2051                 unless ($$create{author});
2052         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2053                 unless ($$create{subject});
2054         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2055                 unless ($$create{keyword});
2056         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2057                 unless ($$create{series});
2058
2059
2060         my ($outer_xact) = $in_xact->run;
2061         try {
2062                 unless ($outer_xact) {
2063                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2064                         my ($r) = $begin->run($client);
2065                         unless (defined $r and $r) {
2066                                 $rollback->run;
2067                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2068                         }
2069                 }
2070         } catch Error with {
2071                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2072         };
2073
2074         my @source_maps;
2075         my @entry_list;
2076         my @mr_list;
2077         my @rd_list;
2078         my @ns_list;
2079         my @mods_data;
2080         my $ret = 0;
2081         for my $entry ( $lookup->run(@docids) ) {
2082                 # step -1: grab the doc from storage
2083                 next unless ($entry);
2084
2085                 if(!$mods_sheet) {
2086                         my $xslt_doc = $parser->parse_file(
2087                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2088                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2089                 }
2090
2091                 my $xml = $entry->marc;
2092                 my $docid = $entry->id;
2093                 my $marcdoc = $parser->parse_string($xml);
2094                 my $modsdoc = $mods_sheet->transform($marcdoc);
2095
2096                 my $mods = $modsdoc->documentElement;
2097                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2098
2099                 $entry->fingerprint( fingerprint_mods( $mods ) );
2100                 push @entry_list, $entry;
2101
2102                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2103
2104                 unless ($no_map) {
2105                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2106                         if (!$mr || !@$mr) {
2107                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2108                                 $mr = new Fieldmapper::metabib::metarecord;
2109                                 $mr->fingerprint( $entry->fingerprint );
2110                                 $mr->master_record( $entry->id );
2111                                 my ($new_mr) = $mr_create->run($mr);
2112                                 $mr->id($new_mr);
2113                                 unless (defined $mr) {
2114                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2115                                 }
2116                         } else {
2117                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2118                                 $mr->mods('');
2119                                 push @mr_list, $mr;
2120                         }
2121
2122                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2123                         $sm->metarecord( $mr->id );
2124                         $sm->source( $entry->id );
2125                         push @source_maps, $sm;
2126                 }
2127
2128                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2129                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2130
2131                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2132                 for my $rd_field ( keys %descriptor_code ) {
2133                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2134                 }
2135                 $rd_obj->record( $docid );
2136                 push @rd_list, $rd_obj;
2137
2138                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2139
2140                 # step 2: build the KOHA rows
2141                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2142                 $_->record( $docid ) for (@tmp_list);
2143                 push @ns_list, @tmp_list;
2144
2145                 $ret++;
2146
2147                 last unless ($self->api_name =~ /batch$/o);
2148         }
2149
2150         $rm_old_rd->run( { record => \@docids } );
2151         $rm_old_fr->run( { record => \@docids } );
2152         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2153         $rm_old_tr->run( { source => \@docids } );
2154         $rm_old_ar->run( { source => \@docids } );
2155         $rm_old_sr->run( { source => \@docids } );
2156         $rm_old_kr->run( { source => \@docids } );
2157         $rm_old_ser->run( { source => \@docids } );
2158
2159         unless ($no_map) {
2160                 my ($sm) = $create_source_map->run(@source_maps);
2161                 unless (defined $sm) {
2162                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2163                 }
2164                 my ($mr) = $mr_update->run(@mr_list);
2165                 unless (defined $mr) {
2166                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2167                 }
2168         }
2169
2170         my ($re) = $update_entry->run(@entry_list);
2171         unless (defined $re) {
2172                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2173         }
2174
2175         my ($rd) = $rd_create->run(@rd_list);
2176         unless (defined $rd) {
2177                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2178         }
2179
2180         my ($fr) = $fr_create->run(@ns_list);
2181         unless (defined $fr) {
2182                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2183         }
2184
2185         # step 5: insert the new metadata
2186         for my $class ( qw/title author subject keyword series/ ) {
2187                 my @md_list = ();
2188                 for my $doc ( @mods_data ) {
2189                         my ($did) = keys %$doc;
2190                         my ($data) = values %$doc;
2191
2192                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2193                         for my $row ( keys %{ $$data{$class} } ) {
2194                                 next unless (exists $$data{$class}{$row});
2195                                 next unless ($$data{$class}{$row}{value});
2196                                 my $fm_obj = $fm_constructor->new;
2197                                 $fm_obj->value( $$data{$class}{$row}{value} );
2198                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2199                                 $fm_obj->source( $did );
2200                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2201
2202                                 push @md_list, $fm_obj;
2203                         }
2204                 }
2205                         
2206                 my ($cr) = $$create{$class}->run(@md_list);
2207                 unless (defined $cr) {
2208                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2209                 }
2210         }
2211
2212         unless ($outer_xact) {
2213                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2214                 my ($c) = $commit->run;
2215                 unless (defined $c and $c) {
2216                         $rollback->run;
2217                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2218                 }
2219         }
2220
2221         return $ret;
2222 }
2223 __PACKAGE__->register_method( 
2224         api_name        => "open-ils.worm.wormize",
2225         method          => "wormize",
2226         api_level       => 1,
2227         argc            => 1,
2228 );
2229 __PACKAGE__->register_method( 
2230         api_name        => "open-ils.worm.wormize.no_map",
2231         method          => "wormize",
2232         api_level       => 1,
2233         argc            => 1,
2234 );
2235 __PACKAGE__->register_method( 
2236         api_name        => "open-ils.worm.wormize.batch",
2237         method          => "wormize",
2238         api_level       => 1,
2239         argc            => 1,
2240 );
2241 __PACKAGE__->register_method( 
2242         api_name        => "open-ils.worm.wormize.no_map.batch",
2243         method          => "wormize",
2244         api_level       => 1,
2245         argc            => 1,
2246 );
2247
2248
2249 my $ain_xact;
2250 my $abegin;
2251 my $acommit;
2252 my $arollback;
2253 my $alookup;
2254 my $aupdate_entry;
2255 my $amr_lookup;
2256 my $amr_update;
2257 my $amr_create;
2258 my $acreate_source_map;
2259 my $asm_lookup;
2260 my $arm_old_rd;
2261 my $arm_old_sm;
2262 my $arm_old_fr;
2263 my $arm_old_tr;
2264 my $arm_old_ar;
2265 my $arm_old_sr;
2266 my $arm_old_kr;
2267 my $arm_old_ser;
2268
2269 my $afr_create;
2270 my $ard_create;
2271 my $acreate = {};
2272
2273 sub authority_wormize {
2274
2275         my $self = shift;
2276         my $client = shift;
2277         my @docids = @_;
2278
2279         my $no_map = 0;
2280         if ($self->api_name =~ /no_map/o) {
2281                 $no_map = 1;
2282         }
2283
2284         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2285                 unless ($in_xact);
2286         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2287                 unless ($begin);
2288         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2289                 unless ($commit);
2290         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2291                 unless ($rollback);
2292         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2293                 unless ($alookup);
2294         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2295                 unless ($aupdate_entry);
2296         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2297                 unless ($arm_old_rd);
2298         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2299                 unless ($arm_old_fr);
2300         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2301                 unless ($ard_create);
2302         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2303                 unless ($afr_create);
2304
2305
2306         my ($outer_xact) = $in_xact->run;
2307         try {
2308                 unless ($outer_xact) {
2309                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2310                         my ($r) = $begin->run($client);
2311                         unless (defined $r and $r) {
2312                                 $rollback->run;
2313                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2314                         }
2315                 }
2316         } catch Error with {
2317                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2318         };
2319
2320         my @source_maps;
2321         my @entry_list;
2322         my @mr_list;
2323         my @rd_list;
2324         my @ns_list;
2325         my @mads_data;
2326         my $ret = 0;
2327         for my $entry ( $lookup->run(@docids) ) {
2328                 # step -1: grab the doc from storage
2329                 next unless ($entry);
2330
2331                 #if(!$mads_sheet) {
2332                 #       my $xslt_doc = $parser->parse_file(
2333                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2334                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2335                 #}
2336
2337                 my $xml = $entry->marc;
2338                 my $docid = $entry->id;
2339                 my $marcdoc = $parser->parse_string($xml);
2340                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2341
2342                 #my $mads = $madsdoc->documentElement;
2343                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2344
2345                 push @entry_list, $entry;
2346
2347                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2348                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2349
2350                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2351                 for my $rd_field ( keys %descriptor_code ) {
2352                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2353                 }
2354                 $rd_obj->record( $docid );
2355                 push @rd_list, $rd_obj;
2356
2357                 # step 2: build the KOHA rows
2358                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2359                 $_->record( $docid ) for (@tmp_list);
2360                 push @ns_list, @tmp_list;
2361
2362                 $ret++;
2363
2364                 last unless ($self->api_name =~ /batch$/o);
2365         }
2366
2367         $arm_old_rd->run( { record => \@docids } );
2368         $arm_old_fr->run( { record => \@docids } );
2369
2370         my ($rd) = $ard_create->run(@rd_list);
2371         unless (defined $rd) {
2372                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2373         }
2374
2375         my ($fr) = $fr_create->run(@ns_list);
2376         unless (defined $fr) {
2377                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2378         }
2379
2380         unless ($outer_xact) {
2381                 $log->debug("Commiting transaction started by Ingest.", INFO);
2382                 my ($c) = $commit->run;
2383                 unless (defined $c and $c) {
2384                         $rollback->run;
2385                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2386                 }
2387         }
2388
2389         return $ret;
2390 }
2391 __PACKAGE__->register_method( 
2392         api_name        => "open-ils.worm.authortiy.wormize",
2393         method          => "wormize",
2394         api_level       => 1,
2395         argc            => 1,
2396 );
2397 __PACKAGE__->register_method( 
2398         api_name        => "open-ils.worm.authority.wormize.batch",
2399         method          => "wormize",
2400         api_level       => 1,
2401         argc            => 1,
2402 );
2403
2404
2405 # --------------------------------------------------------------------------------
2406
2407
2408 sub _marcxml_to_full_rows {
2409
2410         my $marcxml = shift;
2411         my $type = shift || 'Fieldmapper::metabib::full_rec';
2412
2413         my @ns_list;
2414         
2415         my $root = $marcxml->documentElement;
2416
2417         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2418                 next unless $tagline;
2419
2420                 my $ns = new Fieldmapper::metabib::full_rec;
2421
2422                 $ns->tag( 'LDR' );
2423                 my $val = NFD($tagline->textContent);
2424                 $val =~ s/(\pM+)//gso;
2425                 $ns->value( $val );
2426
2427                 push @ns_list, $ns;
2428         }
2429
2430         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2431                 next unless $tagline;
2432
2433                 my $ns = new Fieldmapper::metabib::full_rec;
2434
2435                 $ns->tag( $tagline->getAttribute( "tag" ) );
2436                 my $val = NFD($tagline->textContent);
2437                 $val =~ s/(\pM+)//gso;
2438                 $ns->value( $val );
2439
2440                 push @ns_list, $ns;
2441         }
2442
2443         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2444                 next unless $tagline;
2445
2446                 my $tag = $tagline->getAttribute( "tag" );
2447                 my $ind1 = $tagline->getAttribute( "ind1" );
2448                 my $ind2 = $tagline->getAttribute( "ind2" );
2449
2450                 for my $data ( $tagline->childNodes ) {
2451                         next unless $data;
2452
2453                         my $ns = $type->new;
2454
2455                         $ns->tag( $tag );
2456                         $ns->ind1( $ind1 );
2457                         $ns->ind2( $ind2 );
2458                         $ns->subfield( $data->getAttribute( "code" ) );
2459                         my $val = NFD($data->textContent);
2460                         $val =~ s/(\pM+)//gso;
2461                         $ns->value( lc($val) );
2462
2463                         push @ns_list, $ns;
2464                 }
2465         }
2466         return @ns_list;
2467 }
2468
2469 sub _get_field_value {
2470
2471         my( $root, $xpath ) = @_;
2472
2473         my $string = "";
2474
2475         # grab the set of matching nodes
2476         my @nodes = $root->findnodes( $xpath );
2477         for my $value (@nodes) {
2478
2479                 # grab all children of the node
2480                 my @children = $value->childNodes();
2481                 for my $child (@children) {
2482
2483                         # add the childs content to the growing buffer
2484                         my $content = quotemeta($child->textContent);
2485                         next if ($string =~ /$content/);  # uniquify the values
2486                         $string .= $child->textContent . " ";
2487                 }
2488                 if( ! @children ) {
2489                         $string .= $value->textContent . " ";
2490                 }
2491         }
2492         $string = NFD($string);
2493         $string =~ s/(\pM)//gso;
2494         return lc($string);
2495 }
2496
2497
2498 sub modsdoc_to_values {
2499         my( $self, $mods ) = @_;
2500         my $data = {};
2501         for my $class (keys %$xpathset) {
2502                 $data->{$class} = {};
2503                 for my $type (keys %{$xpathset->{$class}}) {
2504                         $data->{$class}->{$type} = {};
2505                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2506                 }
2507         }
2508         return $data;
2509 }
2510
2511
2512 1;
2513
2514