]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
adding full ingest method
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21 our %supported_formats = (
22         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
23         mods    => {ns => 'http://www.loc.gov/mods/'},
24         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
26         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28         atom    => {ns => 'http://www.w3.org/2005/Atom'},
29         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
30         rss092  => {ns => ''},
31         rss093  => {ns => ''},
32         rss094  => {ns => ''},
33         rss10   => {ns => 'http://purl.org/rss/1.0/'},
34         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
35         rss2    => {ns => ''},
36 );
37
38
39 my $log = 'OpenSRF::Utils::Logger';
40
41 my  $parser = XML::LibXML->new();
42 my  $xslt = XML::LibXSLT->new();
43
44 my  $mods_sheet;
45 my  $mads_sheet;
46 my  $xpathset = {};
47 sub initialize {}
48 sub child_init {}
49
50 sub post_init {
51
52         unless (keys %$xpathset) {
53                 $log->debug("Running post_init", DEBUG);
54
55                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56
57                 unless ($supported_formats{mods}{xslt}) {
58                         $log->debug("Loading MODS XSLT", DEBUG);
59                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61                 }
62
63                 unless ($supported_formats{mods3}{xslt}) {
64                         $log->debug("Loading MODS v3 XSLT", DEBUG);
65                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67                 }
68
69
70                 my $req = OpenSRF::AppSession
71                                 ->create('open-ils.cstore')
72                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
73                                 ->gather(1);
74
75                 if (ref $req and @$req) {
76                         for my $f (@$req) {
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
81                         }
82                 }
83         }
84 }
85
86 sub entityize {
87         my $stuff = shift;
88         my $form = shift;
89
90         if ($form eq 'D') {
91                 $stuff = NFD($stuff);
92         } else {
93                 $stuff = NFC($stuff);
94         }
95
96         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
97         return $stuff;
98 }
99
100 # --------------------------------------------------------------------------------
101 # Biblio ingest
102
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
106
107 sub rw_biblio_ingest_single_object {
108         my $self = shift;
109         my $client = shift;
110         my $bib = shift;
111
112         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113         return undef unless ($blob);
114
115         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116         $bib->quality( $blob->{fingerprint}->{quality} );
117
118         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
119
120         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
121
122         # update full_rec stuff ...
123         my $tmp = $cstore->request(
124                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125                 { record => $bib->id }
126         )->gather(1);
127
128         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
130
131         # update rec_descriptor stuff ...
132         $tmp = $cstore->request(
133                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134                 { record => $bib->id }
135         )->gather(1);
136
137         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
139
140         # deal with classed fields...
141         for my $class ( qw/title author subject keyword series/ ) {
142                 $tmp = $cstore->request(
143                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144                         { source => $bib->id }
145                 )->gather(1);
146
147                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
148         }
149         for my $obj ( @{ $blob->{field_entries} } ) {
150                 my $class = $obj->class_name;
151                 $class =~ s/^Fieldmapper:://o;
152                 $class =~ s/::/./go;
153                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
154         }
155
156         # update MR map ...
157
158         $tmp = $cstore->request(
159                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160                 { source => $bib->id }
161         )->gather(1);
162
163         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
164
165
166         # Get the matchin MR, if any.
167         my $mr = $cstore->request(
168                 'open-ils.cstore.direct.metabib.metarecord.search',
169                 { fingerprint => $bib->fingerprint }
170         )->gather(1);
171
172         if (!$mr) {
173                 $mr = new Fieldmapper::metabib::metarecord;
174                 $mr->fingerprint( $bib->fingerprint );
175                 $mr->master_record( $bib->id );
176                 $mr->id(
177                         $cstore->request(
178                                 "open-ils.cstore.direct.metabib.metarecord.create",
179                                 $mr => { quiet => 'true' }
180                         )->gather(1)
181                 );
182         } else {
183                 my $mrm = $cstore->request(
184                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185                         { metarecord => $mr->id }
186                 )->gather(1);
187
188                 my $best = $cstore->request(
189                         "open-ils.cstore.direct.biblio.record_entry.search",
190                         { id => [ map { $_->source } @$mrm ] },
191                         { 'select'      => { bre => [ qw/id quality/ ] },
192                           order_by      => { bre => "quality desc" },
193                           limit         => 1,
194                         }
195                 )->gather(1);
196
197                 if ($best->quality > $bib->quality) {
198                         $mr->master_record($best->id);
199                 } else {
200                         $mr->master_record($bib->id);
201                 }
202
203                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
204         }
205
206         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
207         $mrm->source($bib->id);
208         $mrm->metarecord($mr->id);
209
210         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
211         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
212
213         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
214
215         return $bib->id;
216 }
217 __PACKAGE__->register_method(  
218         api_name        => "open-ils.ingest.full.biblio.object",
219         method          => "rw_biblio_ingest_single_object",
220         api_level       => 1,
221         argc            => 1,
222 );                      
223
224 sub rw_biblio_ingest_single_record {
225         my $self = shift;
226         my $client = shift;
227         my $rec = shift;
228
229         OpenILS::Application::Ingest->post_init();
230         my $r = OpenSRF::AppSession
231                         ->create('open-ils.cstore')
232                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
233                         ->gather(1);
234
235         return undef unless ($r and @$r);
236
237         return $self->method_lookup("open-ils.ingest.full.biblio.object")->run($r);
238 }
239 __PACKAGE__->register_method(  
240         api_name        => "open-ils.ingest.full.biblio.record",
241         method          => "rw_biblio_ingest_single_record",
242         api_level       => 1,
243         argc            => 1,
244 );                      
245
246 sub ro_biblio_ingest_single_object {
247         my $self = shift;
248         my $client = shift;
249         my $bib = shift;
250         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
251
252         my $document = $parser->parse_string($xml);
253
254         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
255         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
256         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
257         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
258
259         $_->source($bib->id) for (@mXfe);
260         $_->record($bib->id) for (@mfr);
261         $rd->record($bib->id) if ($rd);
262
263         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
264 }
265 __PACKAGE__->register_method(  
266         api_name        => "open-ils.ingest.full.biblio.object.readonly",
267         method          => "ro_biblio_ingest_single_object",
268         api_level       => 1,
269         argc            => 1,
270 );                      
271
272 sub ro_biblio_ingest_single_xml {
273         my $self = shift;
274         my $client = shift;
275         my $xml = OpenILS::Application::Ingest::entityize(shift);
276
277         my $document = $parser->parse_string($xml);
278
279         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
280         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
281         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
282         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
283
284         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
285 }
286 __PACKAGE__->register_method(  
287         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
288         method          => "ro_biblio_ingest_single_xml",
289         api_level       => 1,
290         argc            => 1,
291 );                      
292
293 sub ro_biblio_ingest_single_record {
294         my $self = shift;
295         my $client = shift;
296         my $rec = shift;
297
298         OpenILS::Application::Ingest->post_init();
299         my $r = OpenSRF::AppSession
300                         ->create('open-ils.cstore')
301                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
302                         ->gather(1);
303
304         return undef unless ($r and @$r);
305
306         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
307
308         $_->source($rec) for (@{$res->{field_entries}});
309         $_->record($rec) for (@{$res->{full_rec}});
310         $res->{descriptor}->record($rec);
311
312         return $res;
313 }
314 __PACKAGE__->register_method(  
315         api_name        => "open-ils.ingest.full.biblio.record.readonly",
316         method          => "ro_biblio_ingest_single_record",
317         api_level       => 1,
318         argc            => 1,
319 );                      
320
321 sub ro_biblio_ingest_stream_record {
322         my $self = shift;
323         my $client = shift;
324
325         OpenILS::Application::Ingest->post_init();
326
327         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
328
329         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
330         
331                 my $rec = $resp->content;
332                 last unless (defined $rec);
333
334                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
335                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
336
337                 $_->source($rec) for (@{$res->{field_entries}});
338                 $_->record($rec) for (@{$res->{full_rec}});
339
340                 $client->respond( $res );
341         }
342
343         return undef;
344 }
345 __PACKAGE__->register_method(  
346         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
347         method          => "ro_biblio_ingest_stream_record",
348         api_level       => 1,
349         stream          => 1,
350 );                      
351
352 sub ro_biblio_ingest_stream_xml {
353         my $self = shift;
354         my $client = shift;
355
356         OpenILS::Application::Ingest->post_init();
357
358         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
359
360         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
361         
362                 my $xml = $resp->content;
363                 last unless (defined $xml);
364
365                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
366                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
367
368                 $client->respond( $res );
369         }
370
371         return undef;
372 }
373 __PACKAGE__->register_method(  
374         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
375         method          => "ro_biblio_ingest_stream_xml",
376         api_level       => 1,
377         stream          => 1,
378 );                      
379
380 sub rw_biblio_ingest_stream_import {
381         my $self = shift;
382         my $client = shift;
383
384         OpenILS::Application::Ingest->post_init();
385
386         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
387
388         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
389         
390                 my $bib = $resp->content;
391                 last unless (defined $bib);
392
393                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
394                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
395
396                 $_->source($bib->id) for (@{$res->{field_entries}});
397                 $_->record($bib->id) for (@{$res->{full_rec}});
398
399                 $client->respond( $res );
400         }
401
402         return undef;
403 }
404 __PACKAGE__->register_method(  
405         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
406         method          => "rw_biblio_ingest_stream_import",
407         api_level       => 1,
408         stream          => 1,
409 );                      
410
411
412 # --------------------------------------------------------------------------------
413 # Authority ingest
414
415 package OpenILS::Application::Ingest::Authority;
416 use base qw/OpenILS::Application::Ingest/;
417 use Unicode::Normalize;
418
419 sub ro_authority_ingest_single_object {
420         my $self = shift;
421         my $client = shift;
422         my $bib = shift;
423         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
424
425         my $document = $parser->parse_string($xml);
426
427         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
428
429         $_->record($bib->id) for (@mfr);
430
431         return { full_rec => \@mfr };
432 }
433 __PACKAGE__->register_method(  
434         api_name        => "open-ils.ingest.full.authority.object.readonly",
435         method          => "ro_authority_ingest_single_object",
436         api_level       => 1,
437         argc            => 1,
438 );                      
439
440 sub ro_authority_ingest_single_xml {
441         my $self = shift;
442         my $client = shift;
443         my $xml = OpenILS::Application::Ingest::entityize(shift);
444
445         my $document = $parser->parse_string($xml);
446
447         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
448
449         return { full_rec => \@mfr };
450 }
451 __PACKAGE__->register_method(  
452         api_name        => "open-ils.ingest.full.authority.xml.readonly",
453         method          => "ro_authority_ingest_single_xml",
454         api_level       => 1,
455         argc            => 1,
456 );                      
457
458 sub ro_authority_ingest_single_record {
459         my $self = shift;
460         my $client = shift;
461         my $rec = shift;
462
463         OpenILS::Application::Ingest->post_init();
464         my $r = OpenSRF::AppSession
465                         ->create('open-ils.cstore')
466                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
467                         ->gather(1);
468
469         return undef unless ($r and @$r);
470
471         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
472
473         $_->record($rec) for (@{$res->{full_rec}});
474         $res->{descriptor}->record($rec);
475
476         return $res;
477 }
478 __PACKAGE__->register_method(  
479         api_name        => "open-ils.ingest.full.authority.record.readonly",
480         method          => "ro_authority_ingest_single_record",
481         api_level       => 1,
482         argc            => 1,
483 );                      
484
485 sub ro_authority_ingest_stream_record {
486         my $self = shift;
487         my $client = shift;
488
489         OpenILS::Application::Ingest->post_init();
490
491         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
492
493         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
494         
495                 my $rec = $resp->content;
496                 last unless (defined $rec);
497
498                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
499                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
500
501                 $_->record($rec) for (@{$res->{full_rec}});
502
503                 $client->respond( $res );
504         }
505
506         return undef;
507 }
508 __PACKAGE__->register_method(  
509         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
510         method          => "ro_authority_ingest_stream_record",
511         api_level       => 1,
512         stream          => 1,
513 );                      
514
515 sub ro_authority_ingest_stream_xml {
516         my $self = shift;
517         my $client = shift;
518
519         OpenILS::Application::Ingest->post_init();
520
521         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
522
523         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
524         
525                 my $xml = $resp->content;
526                 last unless (defined $xml);
527
528                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
529                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
530
531                 $client->respond( $res );
532         }
533
534         return undef;
535 }
536 __PACKAGE__->register_method(  
537         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
538         method          => "ro_authority_ingest_stream_xml",
539         api_level       => 1,
540         stream          => 1,
541 );                      
542
543 sub rw_authority_ingest_stream_import {
544         my $self = shift;
545         my $client = shift;
546
547         OpenILS::Application::Ingest->post_init();
548
549         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
550
551         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
552         
553                 my $bib = $resp->content;
554                 last unless (defined $bib);
555
556                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
557                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
558
559                 $_->record($bib->id) for (@{$res->{full_rec}});
560
561                 $client->respond( $res );
562         }
563
564         return undef;
565 }
566 __PACKAGE__->register_method(  
567         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
568         method          => "rw_authority_ingest_stream_import",
569         api_level       => 1,
570         stream          => 1,
571 );                      
572
573
574 # --------------------------------------------------------------------------------
575 # MARC index extraction
576
577 package OpenILS::Application::Ingest::XPATH;
578 use base qw/OpenILS::Application::Ingest/;
579 use Unicode::Normalize;
580
581 # give this an XML documentElement and an XPATH expression
582 sub xpath_to_string {
583         my $xml = shift;
584         my $xpath = shift;
585         my $ns_uri = shift;
586         my $ns_prefix = shift;
587         my $unique = shift;
588
589         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
590
591         my $string = "";
592
593         # grab the set of matching nodes
594         my @nodes = $xml->findnodes( $xpath );
595         for my $value (@nodes) {
596
597                 # grab all children of the node
598                 my @children = $value->childNodes();
599                 for my $child (@children) {
600
601                         # add the childs content to the growing buffer
602                         my $content = quotemeta($child->textContent);
603                         next if ($unique && $string =~ /$content/);  # uniquify the values
604                         $string .= $child->textContent . " ";
605                 }
606                 if( ! @children ) {
607                         $string .= $value->textContent . " ";
608                 }
609         }
610         return NFD($string);
611 }
612
613 sub class_index_string_xml {
614         my $self = shift;
615         my $client = shift;
616         my $xml = shift;
617         my @classes = @_;
618
619         OpenILS::Application::Ingest->post_init();
620         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
621
622         my %transform_cache;
623         
624         for my $class (@classes) {
625                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
626                 for my $type ( keys %{ $xpathset->{$class} } ) {
627
628                         my $def = $xpathset->{$class}->{$type};
629                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
630
631                         my $document = $xml;
632
633                         if ($sf->{xslt}) {
634                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
635                                 $transform_cache{$def->{format}} = $document;
636                         }
637
638                         my $value =  xpath_to_string(
639                                         $document->documentElement      => $def->{xpath},
640                                         $sf->{ns}                       => $def->{format},
641                                         1
642                         );
643
644                         next unless $value;
645
646                         $value = NFD($value);
647                         $value =~ s/\pM+//sgo;
648                         $value =~ s/\pC+//sgo;
649                         $value =~ s/\W+$//sgo;
650
651                         $value =~ s/(\w)\.+(\w)/$1$2/sgo;
652                         $value = lc($value);
653
654                         my $fm = $class_constructor->new;
655                         $fm->value( $value );
656                         $fm->field( $xpathset->{$class}->{$type}->{id} );
657                         $client->respond($fm);
658                 }
659         }
660         return undef;
661 }
662 __PACKAGE__->register_method(  
663         api_name        => "open-ils.ingest.field_entry.class.xml",
664         method          => "class_index_string_xml",
665         api_level       => 1,
666         argc            => 2,
667         stream          => 1,
668 );                      
669
670 sub class_index_string_record {
671         my $self = shift;
672         my $client = shift;
673         my $rec = shift;
674         my @classes = shift;
675
676         OpenILS::Application::Ingest->post_init();
677         my $r = OpenSRF::AppSession
678                         ->create('open-ils.cstore')
679                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
680                         ->gather(1);
681
682         return undef unless ($r and @$r);
683
684         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
685                 $fm->source($rec);
686                 $client->respond($fm);
687         }
688         return undef;
689 }
690 __PACKAGE__->register_method(  
691         api_name        => "open-ils.ingest.field_entry.class.record",
692         method          => "class_index_string_record",
693         api_level       => 1,
694         argc            => 2,
695         stream          => 1,
696 );                      
697
698 sub all_index_string_xml {
699         my $self = shift;
700         my $client = shift;
701         my $xml = shift;
702
703         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
704                 $client->respond($fm);
705         }
706         return undef;
707 }
708 __PACKAGE__->register_method(  
709         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
710         method          => "all_index_string_xml",
711         api_level       => 1,
712         argc            => 1,
713         stream          => 1,
714 );                      
715
716 sub all_index_string_record {
717         my $self = shift;
718         my $client = shift;
719         my $rec = shift;
720
721         OpenILS::Application::Ingest->post_init();
722         my $r = OpenSRF::AppSession
723                         ->create('open-ils.cstore')
724                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
725                         ->gather(1);
726
727         return undef unless ($r and @$r);
728
729         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
730                 $fm->source($rec);
731                 $client->respond($fm);
732         }
733         return undef;
734 }
735 __PACKAGE__->register_method(  
736         api_name        => "open-ils.ingest.extract.field_entry.all.record",
737         method          => "all_index_string_record",
738         api_level       => 1,
739         argc            => 1,
740         stream          => 1,
741 );                      
742
743 # --------------------------------------------------------------------------------
744 # Flat MARC
745
746 package OpenILS::Application::Ingest::FlatMARC;
747 use base qw/OpenILS::Application::Ingest/;
748 use Unicode::Normalize;
749
750
751 sub _marcxml_to_full_rows {
752
753         my $marcxml = shift;
754         my $xmltype = shift || 'metabib';
755
756         my $type = "Fieldmapper::${xmltype}::full_rec";
757
758         my @ns_list;
759         
760         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
761
762         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
763                 next unless $tagline;
764
765                 my $ns = $type->new;
766
767                 $ns->tag( 'LDR' );
768                 my $val = $tagline->textContent;
769                 $val = NFD($val);
770                 $val =~ s/\pM+//sgo;
771                 $val =~ s/\pC+//sgo;
772                 $val =~ s/\W+$//sgo;
773                 $ns->value( $val );
774
775                 push @ns_list, $ns;
776         }
777
778         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
779                 next unless $tagline;
780
781                 my $ns = $type->new;
782
783                 $ns->tag( $tagline->getAttribute( "tag" ) );
784                 my $val = $tagline->textContent;
785                 $val = NFD($val);
786                 $val =~ s/\pM+//sgo;
787                 $val =~ s/\pC+//sgo;
788                 $val =~ s/\W+$//sgo;
789                 $ns->value( $val );
790
791                 push @ns_list, $ns;
792         }
793
794         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
795                 next unless $tagline;
796
797                 my $tag = $tagline->getAttribute( "tag" );
798                 my $ind1 = $tagline->getAttribute( "ind1" );
799                 my $ind2 = $tagline->getAttribute( "ind2" );
800
801                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
802                         next unless $data;
803
804                         my $ns = $type->new;
805
806                         $ns->tag( $tag );
807                         $ns->ind1( $ind1 );
808                         $ns->ind2( $ind2 );
809                         $ns->subfield( $data->getAttribute( "code" ) );
810                         my $val = $data->textContent;
811                         $val = NFD($val);
812                         $val =~ s/\pM+//sgo;
813                         $val =~ s/\pC+//sgo;
814                         $val =~ s/\W+$//sgo;
815                         $ns->value( lc($val) );
816
817                         push @ns_list, $ns;
818                 }
819         }
820
821         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
822         return @ns_list;
823 }
824
825 sub flat_marc_xml {
826         my $self = shift;
827         my $client = shift;
828         my $xml = shift;
829
830         $log->debug("processing [$xml]");
831
832         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
833
834         my $type = 'metabib';
835         $type = 'authority' if ($self->api_name =~ /authority/o);
836
837         OpenILS::Application::Ingest->post_init();
838
839         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
840         return undef;
841 }
842 __PACKAGE__->register_method(  
843         api_name        => "open-ils.ingest.flat_marc.authority.xml",
844         method          => "flat_marc_xml",
845         api_level       => 1,
846         argc            => 1,
847         stream          => 1,
848 );                      
849 __PACKAGE__->register_method(  
850         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
851         method          => "flat_marc_xml",
852         api_level       => 1,
853         argc            => 1,
854         stream          => 1,
855 );                      
856
857 sub flat_marc_record {
858         my $self = shift;
859         my $client = shift;
860         my $rec = shift;
861
862         my $type = 'biblio';
863         $type = 'authority' if ($self->api_name =~ /authority/o);
864
865         OpenILS::Application::Ingest->post_init();
866         my $r = OpenSRF::AppSession
867                         ->create('open-ils.cstore')
868                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
869                         ->gather(1);
870
871
872         return undef unless ($r and $r->marc);
873
874         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
875         for my $row (@rows) {
876                 $client->respond($row);
877                 $log->debug(JSON->perl2JSON($row), DEBUG);
878         }
879         return undef;
880 }
881 __PACKAGE__->register_method(  
882         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
883         method          => "flat_marc_record",
884         api_level       => 1,
885         argc            => 1,
886         stream          => 1,
887 );                      
888 __PACKAGE__->register_method(  
889         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
890         method          => "flat_marc_record",
891         api_level       => 1,
892         argc            => 1,
893         stream          => 1,
894 );                      
895
896 # --------------------------------------------------------------------------------
897 # Fingerprinting
898
899 package OpenILS::Application::Ingest::Biblio::Fingerprint;
900 use base qw/OpenILS::Application::Ingest/;
901 use Unicode::Normalize;
902 use OpenSRF::EX qw/:try/;
903
904 sub biblio_fingerprint_record {
905         my $self = shift;
906         my $client = shift;
907         my $rec = shift;
908
909         OpenILS::Application::Ingest->post_init();
910
911         my $r = OpenSRF::AppSession
912                         ->create('open-ils.cstore')
913                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
914                         ->gather(1);
915
916         return undef unless ($r and $r->marc);
917
918         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
919         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
920         $fp->{quality} = int($fp->{quality});
921         return $fp;
922 }
923 __PACKAGE__->register_method(  
924         api_name        => "open-ils.ingest.fingerprint.record",
925         method          => "biblio_fingerprint_record",
926         api_level       => 1,
927         argc            => 1,
928 );                      
929
930 our $fp_script;
931 sub biblio_fingerprint {
932         my $self = shift;
933         my $client = shift;
934         my $xml = OpenILS::Application::Ingest::entityize(shift);
935
936         $log->internal("Got MARC [$xml]");
937
938         if(!$fp_script) {
939                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
940                 my $conf = OpenSRF::Utils::SettingsClient->new;
941
942                 my $libs        = $conf->config_value(@pfx, 'script_path');
943                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
944                 my $script_libs = (ref($libs)) ? $libs : [$libs];
945
946                 $log->debug("Loading script $script_file for biblio fingerprinting...");
947                 
948                 $fp_script = new OpenILS::Utils::ScriptRunner
949                         ( file          => $script_file,
950                           paths         => $script_libs,
951                           reset_count   => 100 );
952         }
953
954         $fp_script->insert('environment' => {marc => $xml} => 1);
955
956         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
957         $log->debug("Script for biblio fingerprinting completed successfully...");
958
959         return $res;
960 }
961 __PACKAGE__->register_method(  
962         api_name        => "open-ils.ingest.fingerprint.xml",
963         method          => "biblio_fingerprint",
964         api_level       => 1,
965         argc            => 1,
966 );                      
967
968 our $rd_script;
969 sub biblio_descriptor {
970         my $self = shift;
971         my $client = shift;
972         my $xml = OpenILS::Application::Ingest::entityize(shift);
973
974         $log->internal("Got MARC [$xml]");
975
976         if(!$rd_script) {
977                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
978                 my $conf = OpenSRF::Utils::SettingsClient->new;
979
980                 my $libs        = $conf->config_value(@pfx, 'script_path');
981                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
982                 my $script_libs = (ref($libs)) ? $libs : [$libs];
983
984                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
985                 
986                 $rd_script = new OpenILS::Utils::ScriptRunner
987                         ( file          => $script_file,
988                           paths         => $script_libs,
989                           reset_count   => 100 );
990         }
991
992         $log->debug("Setting up environment for descriptor extraction script...");
993         $rd_script->insert('environment.marc' => $xml => 1);
994         $log->debug("Environment building complete...");
995
996         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
997         $log->debug("Script for biblio descriptor extraction completed successfully");
998
999         return $res;
1000 }
1001 __PACKAGE__->register_method(  
1002         api_name        => "open-ils.ingest.descriptor.xml",
1003         method          => "biblio_descriptor",
1004         api_level       => 1,
1005         argc            => 1,
1006 );                      
1007
1008
1009 1;
1010
1011 __END__
1012
1013 sub in_transaction {
1014         OpenILS::Application::Ingest->post_init();
1015         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1016 }
1017
1018 sub begin_transaction {
1019         my $self = shift;
1020         my $client = shift;
1021         
1022         OpenILS::Application::Ingest->post_init();
1023         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1024         
1025         try {
1026                 if (!$outer_xact) {
1027                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1028                         #__PACKAGE__->st_sess->connect;
1029                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1030                         unless (defined $r and $r) {
1031                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1032                                 #__PACKAGE__->st_sess->disconnect;
1033                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1034                         }
1035                 }
1036         } otherwise {
1037                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1038         };
1039
1040         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1041 }
1042
1043 sub rollback_transaction {
1044         my $self = shift;
1045         my $client = shift;
1046
1047         OpenILS::Application::Ingest->post_init();
1048         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1049
1050         try {
1051                 if ($outer_xact) {
1052                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1053                 } else {
1054                         $log->debug("Ingest isn't inside a transaction.", INFO);
1055                 }
1056         } catch Error with {
1057                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1058         };
1059
1060         return 1;
1061 }
1062
1063 sub commit_transaction {
1064         my $self = shift;
1065         my $client = shift;
1066
1067         OpenILS::Application::Ingest->post_init();
1068         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1069
1070         try {
1071                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1072                 if ($outer_xact) {
1073                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1074                         unless (defined $r and $r) {
1075                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1076                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1077                         }
1078                         #__PACKAGE__->st_sess->disconnect;
1079                 } else {
1080                         $log->debug("Ingest isn't inside a transaction.", INFO);
1081                 }
1082         } catch Error with {
1083                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1084         };
1085
1086         return 1;
1087 }
1088
1089 sub storage_req {
1090         my $self = shift;
1091         my $method = shift;
1092         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1093         return shift( @res );
1094 }
1095
1096 sub scrub_authority_record {
1097         my $self = shift;
1098         my $client = shift;
1099         my $rec = shift;
1100
1101         my $commit = 0;
1102         if (!OpenILS::Application::Ingest->in_transaction) {
1103                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1104                 $commit = 1;
1105         }
1106
1107         my $success = 1;
1108         try {
1109                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1110
1111                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1112                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1113
1114                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1115         } otherwise {
1116                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1117                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1118                 $success = 0;
1119         };
1120
1121         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1122         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1123         return $success;
1124 }
1125 __PACKAGE__->register_method(  
1126         api_name        => "open-ils.worm.scrub.authority",
1127         method          => "scrub_authority_record",
1128         api_level       => 1,
1129         argc            => 1,
1130 );                      
1131
1132
1133 sub scrub_metabib_record {
1134         my $self = shift;
1135         my $client = shift;
1136         my $rec = shift;
1137
1138         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1139                 $rec = OpenILS::Application::Ingest->storage_req(
1140                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1141                 );
1142         }
1143
1144         my $commit = 0;
1145         if (!OpenILS::Application::Ingest->in_transaction) {
1146                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1147                 $commit = 1;
1148         }
1149
1150         my $success = 1;
1151         try {
1152                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1153                 
1154                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1155                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1156                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1157                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1158                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1159                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1160                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1161                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1162
1163                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1164                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1165
1166                 for my $mr (@$masters) {
1167                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1168                         my $others = OpenILS::Application::Ingest->storage_req(
1169                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1170
1171                         if (@$others) {
1172                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1173                                 $mr->master_record($others->[0]->source);
1174                                 OpenILS::Application::Ingest->storage_req(
1175                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1176                                         { id => $mr->id },
1177                                         { master_record => $others->[0]->source, mods => undef }
1178                                 );
1179                         } else {
1180                                 warn "Removing metarecord whose master is $rec";
1181                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1182                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1183                                 warn "Metarecord removed";
1184                                 $log->debug( "Metarecord removed", DEBUG);
1185                         }
1186                 }
1187
1188                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1189
1190         } otherwise {
1191                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1192                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1193                 $success = 0;
1194         };
1195
1196         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1197         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1198         return $success;
1199 }
1200 __PACKAGE__->register_method(  
1201         api_name        => "open-ils.worm.scrub.biblio",
1202         method          => "scrub_metabib_record",
1203         api_level       => 1,
1204         argc            => 1,
1205 );                      
1206
1207 sub wormize_biblio_metarecord {
1208         my $self = shift;
1209         my $client = shift;
1210         my $mrec = shift;
1211
1212         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1213
1214         my $count = 0;
1215         for my $r (@$recs) {
1216                 my $success = 0;
1217                 try {
1218                         $success = wormize_biblio_record($self => $client => $r->source);
1219                         $client->respond(
1220                                 { record  => $r->source,
1221                                   metarecord => $rec->metarecord,
1222                                   success => $success,
1223                                 }
1224                         );
1225                 } catch Error with {
1226                         my $e = shift;
1227                         $client->respond(
1228                                 { record  => $r->source,
1229                                   metarecord => $rec->metarecord,
1230                                   success => $success,
1231                                   error   => $e,
1232                                 }
1233                         );
1234                 };
1235         }
1236         return undef;
1237 }
1238 __PACKAGE__->register_method(
1239         api_name        => "open-ils.worm.wormize.metarecord",
1240         method          => "wormize_biblio_metarecord",
1241         api_level       => 1,
1242         argc            => 1,
1243         stream          => 1,
1244 );
1245 __PACKAGE__->register_method(
1246         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1247         method          => "wormize_biblio_metarecord",
1248         api_level       => 1,
1249         argc            => 1,
1250         stream          => 1,
1251 );
1252 __PACKAGE__->register_method(
1253         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1254         method          => "wormize_biblio_metarecord",
1255         api_level       => 1,
1256         argc            => 1,
1257         stream          => 1,
1258 );
1259 __PACKAGE__->register_method(
1260         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1261         method          => "wormize_biblio_metarecord",
1262         api_level       => 1,
1263         argc            => 1,
1264         stream          => 1,
1265 );
1266
1267
1268 sub wormize_biblio_record {
1269         my $self = shift;
1270         my $client = shift;
1271         my $rec = shift;
1272
1273         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1274                 $rec = OpenILS::Application::Ingest->storage_req(
1275                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1276                 );
1277         }
1278
1279
1280         my $commit = 0;
1281         if (!OpenILS::Application::Ingest->in_transaction) {
1282                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1283                 $commit = 1;
1284         }
1285
1286         my $success = 1;
1287         try {
1288                 # clean up the cruft
1289                 unless ($self->api_name =~ /noscrub/o) {
1290                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1291                 }
1292
1293                 # now redo 'em
1294                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1295
1296                 my @full_rec = ();
1297                 my @rec_descriptor = ();
1298                 my %field_entry = (
1299                         title   => [],
1300                         author  => [],
1301                         subject => [],
1302                         keyword => [],
1303                         series  => [],
1304                 );
1305                 my %metarecord = ();
1306                 my @source_map = ();
1307                 for my $r (@$bibs) {
1308                         try {
1309                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1310
1311                                 my $xml = $parser->parse_string($r->marc);
1312
1313                                 #update the fingerprint
1314                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1315                                 OpenILS::Application::Ingest->storage_req(
1316                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1317                                         { id => $r->id },
1318                                         { fingerprint => $fp->{fingerprint},
1319                                           quality     => int($fp->{quality}) }
1320                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1321
1322                                 # the full_rec stuff
1323                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1324                                         $fr->record( $r->id );
1325                                         push @full_rec, $fr;
1326                                 }
1327
1328                                 # the rec_descriptor stuff
1329                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1330                                 $rd->record( $r->id );
1331                                 push @rec_descriptor, $rd;
1332                         
1333                                 # the indexing field entry stuff
1334                                 for my $class ( qw/title author subject keyword series/ ) {
1335                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1336                                                 $fe->source( $r->id );
1337                                                 push @{$field_entry{$class}}, $fe;
1338                                         }
1339                                 }
1340
1341                                 unless ($self->api_name =~ /nomap/o) {
1342                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1343                                 
1344                                         unless ($mr) {
1345                                                 $mr = Fieldmapper::metabib::metarecord->new;
1346                                                 $mr->fingerprint( $fp->{fingerprint} );
1347                                                 $mr->master_record( $r->id );
1348                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1349                                         }
1350
1351                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1352                                         $mr_map->metarecord( $mr->id );
1353                                         $mr_map->source( $r->id );
1354                                         push @source_map, $mr_map;
1355
1356                                         $metarecord{$mr->id} = $mr;
1357                                 }
1358                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1359                         } otherwise {
1360                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1361                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1362                         };
1363                 }
1364                 
1365
1366                 if (@rec_descriptor) {
1367                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1368
1369                         OpenILS::Application::Ingest->storage_req(
1370                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1371                                 @source_map
1372                         ) if (@source_map);
1373
1374                         for my $mr ( values %metarecord ) {
1375                                 my $sources = OpenILS::Application::Ingest->storage_req(
1376                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1377                                         $mr->id
1378                                 );
1379
1380                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1381                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1382                                         [ map { $_->source } @$sources ]
1383                                 );
1384
1385                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1386
1387                                 OpenILS::Application::Ingest->storage_req(
1388                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1389                                         { id => $mr->id },
1390                                         { master_record => $master->id, mods => undef }
1391                                 );
1392                         }
1393
1394                         OpenILS::Application::Ingest->storage_req(
1395                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1396                                 @rec_descriptor
1397                         ) if (@rec_descriptor);
1398
1399                         OpenILS::Application::Ingest->storage_req(
1400                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1401                                 @full_rec
1402                         ) if (@full_rec);
1403
1404                         OpenILS::Application::Ingest->storage_req(
1405                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1406                                 @{ $field_entry{title} }
1407                         ) if (@{ $field_entry{title} });
1408
1409                         OpenILS::Application::Ingest->storage_req(
1410                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1411                                 @{ $field_entry{author} }
1412                         ) if (@{ $field_entry{author} });
1413                         
1414                         OpenILS::Application::Ingest->storage_req(
1415                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1416                                 @{ $field_entry{subject} }
1417                         ) if (@{ $field_entry{subject} });
1418
1419                         OpenILS::Application::Ingest->storage_req(
1420                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1421                                 @{ $field_entry{keyword} }
1422                         ) if (@{ $field_entry{keyword} });
1423
1424                         OpenILS::Application::Ingest->storage_req(
1425                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1426                                 @{ $field_entry{series} }
1427                         ) if (@{ $field_entry{series} });
1428
1429                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1430                 } else {
1431                         $success = 0;
1432                 }
1433
1434         } otherwise {
1435                 $log->debug('Wormization failed : '.shift(), ERROR);
1436                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1437                 $success = 0;
1438         };
1439
1440         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1441         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1442         return $success;
1443 }
1444 __PACKAGE__->register_method(
1445         api_name        => "open-ils.worm.wormize.biblio",
1446         method          => "wormize_biblio_record",
1447         api_level       => 1,
1448         argc            => 1,
1449 );
1450 __PACKAGE__->register_method(
1451         api_name        => "open-ils.worm.wormize.biblio.nomap",
1452         method          => "wormize_biblio_record",
1453         api_level       => 1,
1454         argc            => 1,
1455 );
1456 __PACKAGE__->register_method(
1457         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1458         method          => "wormize_biblio_record",
1459         api_level       => 1,
1460         argc            => 1,
1461 );
1462 __PACKAGE__->register_method(
1463         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1464         method          => "wormize_biblio_record",
1465         api_level       => 1,
1466         argc            => 1,
1467 );
1468
1469 sub wormize_authority_record {
1470         my $self = shift;
1471         my $client = shift;
1472         my $rec = shift;
1473
1474         my $commit = 0;
1475         if (!OpenILS::Application::Ingest->in_transaction) {
1476                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1477                 $commit = 1;
1478         }
1479
1480         my $success = 1;
1481         try {
1482                 # clean up the cruft
1483                 unless ($self->api_name =~ /noscrub/o) {
1484                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1485                 }
1486
1487                 # now redo 'em
1488                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1489
1490                 my @full_rec = ();
1491                 my @rec_descriptor = ();
1492                 for my $r (@$bibs) {
1493                         my $xml = $parser->parse_string($r->marc);
1494
1495                         # the full_rec stuff
1496                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1497                                 $fr->record( $r->id );
1498                                 push @full_rec, $fr;
1499                         }
1500
1501                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1502                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1503                         #$rd->record( $r->id );
1504                         #push @rec_descriptor, $rd;
1505                         
1506                 }
1507
1508                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1509
1510                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1511                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1512
1513                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1514
1515         } otherwise {
1516                 $log->debug('Wormization failed : '.shift(), ERROR);
1517                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1518                 $success = 0;
1519         };
1520
1521         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1522         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1523         return $success;
1524 }
1525 __PACKAGE__->register_method(
1526         api_name        => "open-ils.worm.wormize.authority",
1527         method          => "wormize_authority_record",
1528         api_level       => 1,
1529         argc            => 1,
1530 );
1531 __PACKAGE__->register_method(
1532         api_name        => "open-ils.worm.wormize.authority.noscrub",
1533         method          => "wormize_authority_record",
1534         api_level       => 1,
1535         argc            => 1,
1536 );
1537
1538
1539 # --------------------------------------------------------------------------------
1540 # MARC index extraction
1541
1542 package OpenILS::Application::Ingest::XPATH;
1543 use base qw/OpenILS::Application::Ingest/;
1544 use Unicode::Normalize;
1545
1546 # give this a MODS documentElement and an XPATH expression
1547 sub _xpath_to_string {
1548         my $xml = shift;
1549         my $xpath = shift;
1550         my $ns_uri = shift;
1551         my $ns_prefix = shift;
1552         my $unique = shift;
1553
1554         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1555
1556         my $string = "";
1557
1558         # grab the set of matching nodes
1559         my @nodes = $xml->findnodes( $xpath );
1560         for my $value (@nodes) {
1561
1562                 # grab all children of the node
1563                 my @children = $value->childNodes();
1564                 for my $child (@children) {
1565
1566                         # add the childs content to the growing buffer
1567                         my $content = quotemeta($child->textContent);
1568                         next if ($unique && $string =~ /$content/);  # uniquify the values
1569                         $string .= $child->textContent . " ";
1570                 }
1571                 if( ! @children ) {
1572                         $string .= $value->textContent . " ";
1573                 }
1574         }
1575         return NFD($string);
1576 }
1577
1578 sub class_all_index_string_xml {
1579         my $self = shift;
1580         my $client = shift;
1581         my $xml = shift;
1582         my $class = shift;
1583
1584         OpenILS::Application::Ingest->post_init();
1585         $xml = $parser->parse_string($xml) unless (ref $xml);
1586         
1587         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1588         for my $type ( keys %{ $xpathset->{$class} } ) {
1589                 my $value =  _xpath_to_string(
1590                                 $mods_sheet->transform($xml)->documentElement,
1591                                 $xpathset->{$class}->{$type}->{xpath},
1592                                 "http://www.loc.gov/mods/",
1593                                 "mods",
1594                                 1
1595                 );
1596
1597                 next unless $value;
1598
1599                 $value = NFD($value);
1600                 $value =~ s/\pM+//sgo;
1601                 $value =~ s/\pC+//sgo;
1602                 $value =~ s/\W+$//sgo;
1603
1604                 $value =~ s/(\w)\./$1/sgo;
1605                 $value = lc($value);
1606
1607                 my $fm = $class_constructor->new;
1608                 $fm->value( $value );
1609                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1610                 $client->respond($fm);
1611         }
1612         return undef;
1613 }
1614 __PACKAGE__->register_method(  
1615         api_name        => "open-ils.worm.field_entry.class.xml",
1616         method          => "class_all_index_string_xml",
1617         api_level       => 1,
1618         argc            => 1,
1619         stream          => 1,
1620 );                      
1621
1622 sub class_all_index_string_record {
1623         my $self = shift;
1624         my $client = shift;
1625         my $rec = shift;
1626         my $class = shift;
1627
1628         OpenILS::Application::Ingest->post_init();
1629         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1630
1631         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1632                 $fm->source($rec);
1633                 $client->respond($fm);
1634         }
1635         return undef;
1636 }
1637 __PACKAGE__->register_method(  
1638         api_name        => "open-ils.worm.field_entry.class.record",
1639         method          => "class_all_index_string_record",
1640         api_level       => 1,
1641         argc            => 1,
1642         stream          => 1,
1643 );                      
1644
1645
1646 sub class_index_string_xml {
1647         my $self = shift;
1648         my $client = shift;
1649         my $xml = shift;
1650         my $class = shift;
1651         my $type = shift;
1652
1653         OpenILS::Application::Ingest->post_init();
1654         $xml = $parser->parse_string($xml) unless (ref $xml);
1655         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1656 }
1657 __PACKAGE__->register_method(  
1658         api_name        => "open-ils.worm.class.type.xml",
1659         method          => "class_index_string_xml",
1660         api_level       => 1,
1661         argc            => 1,
1662 );                      
1663
1664 sub class_index_string_record {
1665         my $self = shift;
1666         my $client = shift;
1667         my $rec = shift;
1668         my $class = shift;
1669         my $type = shift;
1670
1671         OpenILS::Application::Ingest->post_init();
1672         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1673
1674         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1675         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1676         return $d;
1677 }
1678 __PACKAGE__->register_method(  
1679         api_name        => "open-ils.worm.class.type.record",
1680         method          => "class_index_string_record",
1681         api_level       => 1,
1682         argc            => 1,
1683 );                      
1684
1685 sub xml_xpath {
1686         my $self = shift;
1687         my $client = shift;
1688         my $xml = shift;
1689         my $xpath = shift;
1690         my $uri = shift;
1691         my $prefix = shift;
1692         my $unique = shift;
1693
1694         OpenILS::Application::Ingest->post_init();
1695         $xml = $parser->parse_string($xml) unless (ref $xml);
1696         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1697 }
1698 __PACKAGE__->register_method(  
1699         api_name        => "open-ils.worm.xpath.xml",
1700         method          => "xml_xpath",
1701         api_level       => 1,
1702         argc            => 1,
1703 );                      
1704
1705 sub record_xpath {
1706         my $self = shift;
1707         my $client = shift;
1708         my $rec = shift;
1709         my $xpath = shift;
1710         my $uri = shift;
1711         my $prefix = shift;
1712         my $unique = shift;
1713
1714         OpenILS::Application::Ingest->post_init();
1715         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1716
1717         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1718         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1719         return $d;
1720 }
1721 __PACKAGE__->register_method(  
1722         api_name        => "open-ils.worm.xpath.record",
1723         method          => "record_xpath",
1724         api_level       => 1,
1725         argc            => 1,
1726 );                      
1727
1728
1729 # --------------------------------------------------------------------------------
1730 # MARC Descriptor
1731
1732 package OpenILS::Application::Ingest::Biblio::Leader;
1733 use base qw/OpenILS::Application::Ingest/;
1734 use Unicode::Normalize;
1735
1736 our %marc_type_groups = (
1737         BKS => q/[at]{1}/,
1738         SER => q/[a]{1}/,
1739         VIS => q/[gkro]{1}/,
1740         MIX => q/[p]{1}/,
1741         MAP => q/[ef]{1}/,
1742         SCO => q/[cd]{1}/,
1743         REC => q/[ij]{1}/,
1744         COM => q/[m]{1}/,
1745 );
1746
1747 sub _type_re {
1748         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1749         return qr/$re/;
1750 }
1751
1752 our %biblio_descriptor_code = (
1753         item_type => sub { substr($ldr,6,1); },
1754         item_form =>
1755                 sub {
1756                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1757                                 return substr($oo8,29,1);
1758                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1759                                 return substr($oo8,23,1);
1760                         }
1761                         return ' ';
1762                 },
1763         bib_level => sub { substr($ldr,7,1); },
1764         control_type => sub { substr($ldr,8,1); },
1765         char_encoding => sub { substr($ldr,9,1); },
1766         enc_level => sub { substr($ldr,17,1); },
1767         cat_form => sub { substr($ldr,18,1); },
1768         pub_status => sub { substr($ldr,5,1); },
1769         item_lang => sub { substr($oo8,35,3); },
1770         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1771         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1772         audience => sub { substr($oo8,22,1); },
1773 );
1774
1775 sub _extract_biblio_descriptors {
1776         my $xml = shift;
1777
1778         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1779         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1780         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1781
1782         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1783         for my $rd_field ( keys %biblio_descriptor_code ) {
1784                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1785         }
1786
1787         return $rd_obj;
1788 }
1789
1790 sub extract_biblio_desc_xml {
1791         my $self = shift;
1792         my $client = shift;
1793         my $xml = shift;
1794
1795         $xml = $parser->parse_string($xml) unless (ref $xml);
1796
1797         return _extract_biblio_descriptors( $xml );
1798 }
1799 __PACKAGE__->register_method(  
1800         api_name        => "open-ils.worm.biblio_leader.xml",
1801         method          => "extract_biblio_desc_xml",
1802         api_level       => 1,
1803         argc            => 1,
1804 );                      
1805
1806 sub extract_biblio_desc_record {
1807         my $self = shift;
1808         my $client = shift;
1809         my $rec = shift;
1810
1811         OpenILS::Application::Ingest->post_init();
1812         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1813
1814         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1815         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1816         return $d;
1817 }
1818 __PACKAGE__->register_method(  
1819         api_name        => "open-ils.worm.biblio_leader.record",
1820         method          => "extract_biblio_desc_record",
1821         api_level       => 1,
1822         argc            => 1,
1823 );                      
1824
1825 # --------------------------------------------------------------------------------
1826 # Flat MARC
1827
1828 package OpenILS::Application::Ingest::FlatMARC;
1829 use base qw/OpenILS::Application::Ingest/;
1830 use Unicode::Normalize;
1831
1832
1833 sub _marcxml_to_full_rows {
1834
1835         my $marcxml = shift;
1836         my $xmltype = shift || 'metabib';
1837
1838         my $type = "Fieldmapper::${xmltype}::full_rec";
1839
1840         my @ns_list;
1841         
1842         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1843
1844         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1845                 next unless $tagline;
1846
1847                 my $ns = $type->new;
1848
1849                 $ns->tag( 'LDR' );
1850                 my $val = $tagline->textContent;
1851                 $val = NFD($val);
1852                 $val =~ s/\pM+//sgo;
1853                 $val =~ s/\pC+//sgo;
1854                 $val =~ s/\W+$//sgo;
1855                 $ns->value( $val );
1856
1857                 push @ns_list, $ns;
1858         }
1859
1860         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1861                 next unless $tagline;
1862
1863                 my $ns = $type->new;
1864
1865                 $ns->tag( $tagline->getAttribute( "tag" ) );
1866                 my $val = $tagline->textContent;
1867                 $val = NFD($val);
1868                 $val =~ s/\pM+//sgo;
1869                 $val =~ s/\pC+//sgo;
1870                 $val =~ s/\W+$//sgo;
1871                 $ns->value( $val );
1872
1873                 push @ns_list, $ns;
1874         }
1875
1876         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1877                 next unless $tagline;
1878
1879                 my $tag = $tagline->getAttribute( "tag" );
1880                 my $ind1 = $tagline->getAttribute( "ind1" );
1881                 my $ind2 = $tagline->getAttribute( "ind2" );
1882
1883                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1884                         next unless $data;
1885
1886                         my $ns = $type->new;
1887
1888                         $ns->tag( $tag );
1889                         $ns->ind1( $ind1 );
1890                         $ns->ind2( $ind2 );
1891                         $ns->subfield( $data->getAttribute( "code" ) );
1892                         my $val = $data->textContent;
1893                         $val = NFD($val);
1894                         $val =~ s/\pM+//sgo;
1895                         $val =~ s/\pC+//sgo;
1896                         $val =~ s/\W+$//sgo;
1897                         $ns->value( lc($val) );
1898
1899                         push @ns_list, $ns;
1900                 }
1901         }
1902
1903         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1904         return @ns_list;
1905 }
1906
1907 sub flat_marc_xml {
1908         my $self = shift;
1909         my $client = shift;
1910         my $xml = shift;
1911
1912         $xml = $parser->parse_string($xml) unless (ref $xml);
1913
1914         my $type = 'metabib';
1915         $type = 'authority' if ($self->api_name =~ /authority/o);
1916
1917         OpenILS::Application::Ingest->post_init();
1918
1919         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1920         return undef;
1921 }
1922 __PACKAGE__->register_method(  
1923         api_name        => "open-ils.worm.flat_marc.authority.xml",
1924         method          => "flat_marc_xml",
1925         api_level       => 1,
1926         argc            => 1,
1927         stream          => 1,
1928 );                      
1929 __PACKAGE__->register_method(  
1930         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1931         method          => "flat_marc_xml",
1932         api_level       => 1,
1933         argc            => 1,
1934         stream          => 1,
1935 );                      
1936
1937 sub flat_marc_record {
1938         my $self = shift;
1939         my $client = shift;
1940         my $rec = shift;
1941
1942         my $type = 'biblio';
1943         $type = 'authority' if ($self->api_name =~ /authority/o);
1944
1945         OpenILS::Application::Ingest->post_init();
1946         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1947
1948         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1949         return undef;
1950 }
1951 __PACKAGE__->register_method(  
1952         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1953         method          => "flat_marc_record",
1954         api_level       => 1,
1955         argc            => 1,
1956         stream          => 1,
1957 );                      
1958 __PACKAGE__->register_method(  
1959         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1960         method          => "flat_marc_record",
1961         api_level       => 1,
1962         argc            => 1,
1963         stream          => 1,
1964 );                      
1965
1966
1967 # --------------------------------------------------------------------------------
1968 # Fingerprinting
1969
1970 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1971 use base qw/OpenILS::Application::Ingest/;
1972 use Unicode::Normalize;
1973 use OpenSRF::EX qw/:try/;
1974
1975 my @fp_mods_xpath = (
1976         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1977                         title   => {
1978                                         xpath   => [
1979                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1980                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1981                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1982                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1983                                         ],
1984                                         fixup   => sub {
1985                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1986                                                         $text = NFD($text);
1987                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1988                                                         $text =~ s/\pM+//gso;
1989                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1990                                                         $text = lc($text);
1991                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1992                                                         $text =~ s/\s+/ /sgo;
1993                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1994                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1995                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1996                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1997                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1998                                                         $text =~ s/\[.[^\]]+\]//sgo;
1999                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2000                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2001                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2002                                                 },
2003                         },
2004                         author  => {
2005                                         xpath   => [
2006                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2007                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2008                                         ],
2009                                         fixup   => sub {
2010                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2011                                                         $text = NFD($text);
2012                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2013                                                         $text =~ s/\pM+//gso;
2014                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2015                                                         $text = lc($text);
2016                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2017                                                         $text =~ s/\s+/ /sgo;
2018                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2019                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2020                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2021                                                         $text =~ s/,?\s+.*$//sgo;
2022                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2023                                                 },
2024                         },
2025         ],
2026
2027         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2028                         title   => {
2029                                         xpath   => [
2030                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2031                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2032                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2033                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2034                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2035                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2036                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2037                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2038                                         ],
2039                                         fixup   => sub {
2040                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2041                                                         $text = NFD($text);
2042                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2043                                                         $text =~ s/\pM+//gso;
2044                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2045                                                         $text = lc($text);
2046                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2047                                                         $text =~ s/\s+/ /sgo;
2048                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2049                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2050                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2051                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2052                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2053                                                         $text =~ s/\[.[^\]]+\]//sgo;
2054                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2055                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2056                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2057                                                 },
2058                         },
2059                         author  => {
2060                                         xpath   => [
2061                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2062                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2063                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2064                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2065                                         ],
2066                                         fixup   => sub {
2067                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2068                                                         $text = NFD($text);
2069                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2070                                                         $text =~ s/\pM+//gso;
2071                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2072                                                         $text = lc($text);
2073                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2074                                                         $text =~ s/\s+/ /sgo;
2075                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2077                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2078                                                         $text =~ s/,?\s+.*$//sgo;
2079                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2080                                                 },
2081                         },
2082         ],
2083
2084 );
2085
2086 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2087
2088 sub _fp_mods {
2089         my $mods = shift;
2090         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2091
2092         my $fp_string = '';
2093
2094         my $match_index = 0;
2095         my $block_index = 1;
2096         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2097                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2098
2099                         my $block_name_index = 0;
2100                         my $block_value_index = 1;
2101                         my $block = $fp_mods_xpath[$block_index];
2102                         while ( my $part = $$block[$block_value_index] ) {
2103                                 local $text;
2104                                 for my $xpath ( @{ $part->{xpath} } ) {
2105                                         $text = $mods->findvalue( $xpath );
2106                                         last if ($text);
2107                                 }
2108
2109                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2110
2111                                 if ($text) {
2112                                         $$part{fixup}->();
2113                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2114                                         $fp_string .= $text;
2115                                 }
2116
2117                                 $block_name_index += 2;
2118                                 $block_value_index += 2;
2119                         }
2120                 }
2121                 if ($fp_string) {
2122                         $fp_string =~ s/\W+//gso;
2123                         $log->debug("Fingerprint is [$fp_string]", INFO);;
2124                         return $fp_string;
2125                 }
2126
2127                 $match_index += 2;
2128                 $block_index += 2;
2129         }
2130         return undef;
2131 }
2132
2133 sub refingerprint_bibrec {
2134         my $self = shift;
2135         my $client = shift;
2136         my $rec = shift;
2137
2138         my $commit = 0;
2139         if (!OpenILS::Application::Ingest->in_transaction) {
2140                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2141                 $commit = 1;
2142         }
2143
2144         my $success = 1;
2145         try {
2146                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2147                 for my $b (@$bibs) {
2148                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2149
2150                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2151
2152                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2153
2154                                 OpenILS::Application::Ingest->storage_req(
2155                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2156                                         { id => $b->id },
2157                                         { fingerprint => $fp->{fingerprint},
2158                                           quality     => $fp->{quality} }
2159                                 );
2160
2161                                 if ($self->api_name !~ /nomap/o) {
2162                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2163                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2164                                                 $b->id
2165                                         );
2166
2167                                         my $old_mrid;
2168                                         if (ref($old_source_map) and @$old_source_map) {
2169                                                 for my $m (@$old_source_map) {
2170                                                         $old_mrid = $m->metarecord;
2171                                                         OpenILS::Application::Ingest->storage_req(
2172                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2173                                                                 $m->id
2174                                                         );
2175                                                 }
2176                                         }
2177
2178                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2179                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2180                                                         { metarecord => $old_mrid }
2181                                         ) if ($old_mrid);
2182
2183                                         if (ref($old_sm) and @$old_sm == 0) {
2184                                                 OpenILS::Application::Ingest->storage_req(
2185                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2186                                                         $old_mrid
2187                                                 );
2188                                         }
2189
2190                                         my $mr = OpenILS::Application::Ingest->storage_req(
2191                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2192                                                         { fingerprint => $fp->{fingerprint} }
2193                                         )->[0];
2194                                 
2195                                         unless ($mr) {
2196                                                 $mr = Fieldmapper::metabib::metarecord->new;
2197                                                 $mr->fingerprint( $fp->{fingerprint} );
2198                                                 $mr->master_record( $b->id );
2199                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2200                                         }
2201
2202                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2203                                         $mr_map->metarecord( $mr->id );
2204                                         $mr_map->source( $b->id );
2205                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2206
2207                                 }
2208                         }
2209                         $client->respond($b->id);
2210                 }
2211
2212         } otherwise {
2213                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2214                 $success = 0;
2215         };
2216
2217         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2218         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2219         return undef;
2220 }
2221 __PACKAGE__->register_method(  
2222         api_name        => "open-ils.worm.fingerprint.record.update",
2223         method          => "refingerprint_bibrec",
2224         api_level       => 1,
2225         argc            => 1,
2226         stream          => 1,
2227 );                      
2228
2229 __PACKAGE__->register_method(  
2230         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2231         method          => "refingerprint_bibrec",
2232         api_level       => 1,
2233         argc            => 1,
2234 );                      
2235
2236 =comment
2237
2238 sub fingerprint_bibrec {
2239         my $self = shift;
2240         my $client = shift;
2241         my $rec = shift;
2242
2243         OpenILS::Application::Ingest->post_init();
2244         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2245
2246         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2247         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2248         return $fp;
2249
2250 }
2251 __PACKAGE__->register_method(  
2252         api_name        => "open-ils.worm.fingerprint.record",
2253         method          => "fingerprint_bibrec",
2254         api_level       => 0,
2255         argc            => 1,
2256 );                      
2257
2258
2259 sub fingerprint_mods {
2260         my $self = shift;
2261         my $client = shift;
2262         my $xml = shift;
2263
2264         OpenILS::Application::Ingest->post_init();
2265         my $mods = $parser->parse_string($xml)->documentElement;
2266
2267         return _fp_mods( $mods );
2268 }
2269 __PACKAGE__->register_method(  
2270         api_name        => "open-ils.worm.fingerprint.mods",
2271         method          => "fingerprint_mods",
2272         api_level       => 1,
2273         argc            => 1,
2274 );                      
2275
2276 sub fingerprint_marc {
2277         my $self = shift;
2278         my $client = shift;
2279         my $xml = shift;
2280
2281         $xml = $parser->parse_string($xml) unless (ref $xml);
2282
2283         OpenILS::Application::Ingest->post_init();
2284         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2285         $log->debug("Returning [$fp] as fingerprint", INFO);
2286         return $fp;
2287 }
2288 __PACKAGE__->register_method(  
2289         api_name        => "open-ils.worm.fingerprint.marc",
2290         method          => "fingerprint_marc",
2291         api_level       => 1,
2292         argc            => 1,
2293 );                      
2294
2295
2296 =cut
2297
2298 sub biblio_fingerprint_record {
2299         my $self = shift;
2300         my $client = shift;
2301         my $rec = shift;
2302
2303         OpenILS::Application::Ingest->post_init();
2304
2305         my $marc = OpenILS::Application::Ingest
2306                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2307                         ->marc;
2308
2309         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2310         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2311         return $fp;
2312 }
2313 __PACKAGE__->register_method(  
2314         api_name        => "open-ils.worm.fingerprint.record",
2315         method          => "biblio_fingerprint_record",
2316         api_level       => 1,
2317         argc            => 1,
2318 );                      
2319
2320 our $fp_script;
2321 sub biblio_fingerprint {
2322         my $self = shift;
2323         my $client = shift;
2324         my $marc = shift;
2325
2326         OpenILS::Application::Ingest->post_init();
2327
2328         $marc = $parser->parse_string($marc) unless (ref $marc);
2329
2330         my $mods = OpenILS::Application::Ingest::entityize(
2331                 $mods_sheet
2332                         ->transform( $marc )
2333                         ->documentElement
2334                         ->toString,
2335                 'D'
2336         );
2337
2338         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2339
2340         warn $marc;
2341         $log->internal("Got MARC [$marc]");
2342         $log->internal("Created MODS [$mods]");
2343
2344         if(!$fp_script) {
2345                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2346                 my $conf = OpenSRF::Utils::SettingsClient->new;
2347
2348                 my $libs        = $conf->config_value(@pfx, 'script_path');
2349                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2350                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2351
2352                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2353                 
2354                 $fp_script = new OpenILS::Utils::ScriptRunner
2355                         ( file          => $script_file,
2356                           paths         => $script_libs,
2357                           reset_count   => 1000 );
2358         }
2359
2360         $log->debug("Applying environment for biblio fingerprinting...");
2361
2362         my $env = {marc => $marc, mods => $mods};
2363         #my $res = {fingerprint => '', quality => '0'};
2364
2365         $fp_script->insert('environment' => $env);
2366         #$fp_script->insert('result' => $res);
2367
2368         $log->debug("Running script for biblio fingerprinting...");
2369
2370         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2371
2372         $log->debug("Script for biblio fingerprinting completed successfully...");
2373
2374         return $res;
2375 }
2376 __PACKAGE__->register_method(  
2377         api_name        => "open-ils.worm.fingerprint.marc",
2378         method          => "biblio_fingerprint",
2379         api_level       => 1,
2380         argc            => 1,
2381 );                      
2382
2383 # --------------------------------------------------------------------------------
2384
2385 1;
2386
2387 __END__
2388 my $in_xact;
2389 my $begin;
2390 my $commit;
2391 my $rollback;
2392 my $lookup;
2393 my $update_entry;
2394 my $mr_lookup;
2395 my $mr_update;
2396 my $mr_create;
2397 my $create_source_map;
2398 my $sm_lookup;
2399 my $rm_old_rd;
2400 my $rm_old_sm;
2401 my $rm_old_fr;
2402 my $rm_old_tr;
2403 my $rm_old_ar;
2404 my $rm_old_sr;
2405 my $rm_old_kr;
2406 my $rm_old_ser;
2407
2408 my $fr_create;
2409 my $rd_create;
2410 my $create = {};
2411
2412 my %descriptor_code = (
2413         item_type => 'substr($ldr,6,1)',
2414         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2415         bib_level => 'substr($ldr,7,1)',
2416         control_type => 'substr($ldr,8,1)',
2417         char_encoding => 'substr($ldr,9,1)',
2418         enc_level => 'substr($ldr,17,1)',
2419         cat_form => 'substr($ldr,18,1)',
2420         pub_status => 'substr($ldr,5,1)',
2421         item_lang => 'substr($oo8,35,3)',
2422         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2423         audience => 'substr($oo8,22,1)',
2424 );
2425
2426 sub wormize {
2427
2428         my $self = shift;
2429         my $client = shift;
2430         my @docids = @_;
2431
2432         my $no_map = 0;
2433         if ($self->api_name =~ /no_map/o) {
2434                 $no_map = 1;
2435         }
2436
2437         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2438                 unless ($in_xact);
2439         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2440                 unless ($begin);
2441         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2442                 unless ($commit);
2443         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2444                 unless ($rollback);
2445         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2446                 unless ($sm_lookup);
2447         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2448                 unless ($mr_lookup);
2449         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2450                 unless ($mr_update);
2451         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2452                 unless ($lookup);
2453         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2454                 unless ($update_entry);
2455         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2456                 unless ($rm_old_sm);
2457         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2458                 unless ($rm_old_rd);
2459         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2460                 unless ($rm_old_fr);
2461         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2462                 unless ($rm_old_tr);
2463         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2464                 unless ($rm_old_ar);
2465         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2466                 unless ($rm_old_sr);
2467         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2468                 unless ($rm_old_kr);
2469         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2470                 unless ($rm_old_ser);
2471         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2472                 unless ($mr_create);
2473         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2474                 unless ($create_source_map);
2475         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2476                 unless ($rd_create);
2477         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2478                 unless ($fr_create);
2479         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2480                 unless ($$create{title});
2481         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2482                 unless ($$create{author});
2483         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2484                 unless ($$create{subject});
2485         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2486                 unless ($$create{keyword});
2487         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2488                 unless ($$create{series});
2489
2490
2491         my ($outer_xact) = $in_xact->run;
2492         try {
2493                 unless ($outer_xact) {
2494                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2495                         my ($r) = $begin->run($client);
2496                         unless (defined $r and $r) {
2497                                 $rollback->run;
2498                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2499                         }
2500                 }
2501         } catch Error with {
2502                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2503         };
2504
2505         my @source_maps;
2506         my @entry_list;
2507         my @mr_list;
2508         my @rd_list;
2509         my @ns_list;
2510         my @mods_data;
2511         my $ret = 0;
2512         for my $entry ( $lookup->run(@docids) ) {
2513                 # step -1: grab the doc from storage
2514                 next unless ($entry);
2515
2516                 if(!$mods_sheet) {
2517                         my $xslt_doc = $parser->parse_file(
2518                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2519                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2520                 }
2521
2522                 my $xml = $entry->marc;
2523                 my $docid = $entry->id;
2524                 my $marcdoc = $parser->parse_string($xml);
2525                 my $modsdoc = $mods_sheet->transform($marcdoc);
2526
2527                 my $mods = $modsdoc->documentElement;
2528                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2529
2530                 $entry->fingerprint( fingerprint_mods( $mods ) );
2531                 push @entry_list, $entry;
2532
2533                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2534
2535                 unless ($no_map) {
2536                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2537                         if (!$mr || !@$mr) {
2538                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2539                                 $mr = new Fieldmapper::metabib::metarecord;
2540                                 $mr->fingerprint( $entry->fingerprint );
2541                                 $mr->master_record( $entry->id );
2542                                 my ($new_mr) = $mr_create->run($mr);
2543                                 $mr->id($new_mr);
2544                                 unless (defined $mr) {
2545                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2546                                 }
2547                         } else {
2548                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2549                                 $mr->mods('');
2550                                 push @mr_list, $mr;
2551                         }
2552
2553                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2554                         $sm->metarecord( $mr->id );
2555                         $sm->source( $entry->id );
2556                         push @source_maps, $sm;
2557                 }
2558
2559                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2560                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2561
2562                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2563                 for my $rd_field ( keys %descriptor_code ) {
2564                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2565                 }
2566                 $rd_obj->record( $docid );
2567                 push @rd_list, $rd_obj;
2568
2569                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2570
2571                 # step 2: build the KOHA rows
2572                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2573                 $_->record( $docid ) for (@tmp_list);
2574                 push @ns_list, @tmp_list;
2575
2576                 $ret++;
2577
2578                 last unless ($self->api_name =~ /batch$/o);
2579         }
2580
2581         $rm_old_rd->run( { record => \@docids } );
2582         $rm_old_fr->run( { record => \@docids } );
2583         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2584         $rm_old_tr->run( { source => \@docids } );
2585         $rm_old_ar->run( { source => \@docids } );
2586         $rm_old_sr->run( { source => \@docids } );
2587         $rm_old_kr->run( { source => \@docids } );
2588         $rm_old_ser->run( { source => \@docids } );
2589
2590         unless ($no_map) {
2591                 my ($sm) = $create_source_map->run(@source_maps);
2592                 unless (defined $sm) {
2593                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2594                 }
2595                 my ($mr) = $mr_update->run(@mr_list);
2596                 unless (defined $mr) {
2597                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2598                 }
2599         }
2600
2601         my ($re) = $update_entry->run(@entry_list);
2602         unless (defined $re) {
2603                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2604         }
2605
2606         my ($rd) = $rd_create->run(@rd_list);
2607         unless (defined $rd) {
2608                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2609         }
2610
2611         my ($fr) = $fr_create->run(@ns_list);
2612         unless (defined $fr) {
2613                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2614         }
2615
2616         # step 5: insert the new metadata
2617         for my $class ( qw/title author subject keyword series/ ) {
2618                 my @md_list = ();
2619                 for my $doc ( @mods_data ) {
2620                         my ($did) = keys %$doc;
2621                         my ($data) = values %$doc;
2622
2623                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2624                         for my $row ( keys %{ $$data{$class} } ) {
2625                                 next unless (exists $$data{$class}{$row});
2626                                 next unless ($$data{$class}{$row}{value});
2627                                 my $fm_obj = $fm_constructor->new;
2628                                 $fm_obj->value( $$data{$class}{$row}{value} );
2629                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2630                                 $fm_obj->source( $did );
2631                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2632
2633                                 push @md_list, $fm_obj;
2634                         }
2635                 }
2636                         
2637                 my ($cr) = $$create{$class}->run(@md_list);
2638                 unless (defined $cr) {
2639                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2640                 }
2641         }
2642
2643         unless ($outer_xact) {
2644                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2645                 my ($c) = $commit->run;
2646                 unless (defined $c and $c) {
2647                         $rollback->run;
2648                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2649                 }
2650         }
2651
2652         return $ret;
2653 }
2654 __PACKAGE__->register_method( 
2655         api_name        => "open-ils.worm.wormize",
2656         method          => "wormize",
2657         api_level       => 1,
2658         argc            => 1,
2659 );
2660 __PACKAGE__->register_method( 
2661         api_name        => "open-ils.worm.wormize.no_map",
2662         method          => "wormize",
2663         api_level       => 1,
2664         argc            => 1,
2665 );
2666 __PACKAGE__->register_method( 
2667         api_name        => "open-ils.worm.wormize.batch",
2668         method          => "wormize",
2669         api_level       => 1,
2670         argc            => 1,
2671 );
2672 __PACKAGE__->register_method( 
2673         api_name        => "open-ils.worm.wormize.no_map.batch",
2674         method          => "wormize",
2675         api_level       => 1,
2676         argc            => 1,
2677 );
2678
2679
2680 my $ain_xact;
2681 my $abegin;
2682 my $acommit;
2683 my $arollback;
2684 my $alookup;
2685 my $aupdate_entry;
2686 my $amr_lookup;
2687 my $amr_update;
2688 my $amr_create;
2689 my $acreate_source_map;
2690 my $asm_lookup;
2691 my $arm_old_rd;
2692 my $arm_old_sm;
2693 my $arm_old_fr;
2694 my $arm_old_tr;
2695 my $arm_old_ar;
2696 my $arm_old_sr;
2697 my $arm_old_kr;
2698 my $arm_old_ser;
2699
2700 my $afr_create;
2701 my $ard_create;
2702 my $acreate = {};
2703
2704 sub authority_wormize {
2705
2706         my $self = shift;
2707         my $client = shift;
2708         my @docids = @_;
2709
2710         my $no_map = 0;
2711         if ($self->api_name =~ /no_map/o) {
2712                 $no_map = 1;
2713         }
2714
2715         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2716                 unless ($in_xact);
2717         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2718                 unless ($begin);
2719         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2720                 unless ($commit);
2721         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2722                 unless ($rollback);
2723         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2724                 unless ($alookup);
2725         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2726                 unless ($aupdate_entry);
2727         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2728                 unless ($arm_old_rd);
2729         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2730                 unless ($arm_old_fr);
2731         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2732                 unless ($ard_create);
2733         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2734                 unless ($afr_create);
2735
2736
2737         my ($outer_xact) = $in_xact->run;
2738         try {
2739                 unless ($outer_xact) {
2740                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2741                         my ($r) = $begin->run($client);
2742                         unless (defined $r and $r) {
2743                                 $rollback->run;
2744                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2745                         }
2746                 }
2747         } catch Error with {
2748                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2749         };
2750
2751         my @source_maps;
2752         my @entry_list;
2753         my @mr_list;
2754         my @rd_list;
2755         my @ns_list;
2756         my @mads_data;
2757         my $ret = 0;
2758         for my $entry ( $lookup->run(@docids) ) {
2759                 # step -1: grab the doc from storage
2760                 next unless ($entry);
2761
2762                 #if(!$mads_sheet) {
2763                 #       my $xslt_doc = $parser->parse_file(
2764                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2765                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2766                 #}
2767
2768                 my $xml = $entry->marc;
2769                 my $docid = $entry->id;
2770                 my $marcdoc = $parser->parse_string($xml);
2771                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2772
2773                 #my $mads = $madsdoc->documentElement;
2774                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2775
2776                 push @entry_list, $entry;
2777
2778                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2779                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2780
2781                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2782                 for my $rd_field ( keys %descriptor_code ) {
2783                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2784                 }
2785                 $rd_obj->record( $docid );
2786                 push @rd_list, $rd_obj;
2787
2788                 # step 2: build the KOHA rows
2789                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2790                 $_->record( $docid ) for (@tmp_list);
2791                 push @ns_list, @tmp_list;
2792
2793                 $ret++;
2794
2795                 last unless ($self->api_name =~ /batch$/o);
2796         }
2797
2798         $arm_old_rd->run( { record => \@docids } );
2799         $arm_old_fr->run( { record => \@docids } );
2800
2801         my ($rd) = $ard_create->run(@rd_list);
2802         unless (defined $rd) {
2803                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2804         }
2805
2806         my ($fr) = $fr_create->run(@ns_list);
2807         unless (defined $fr) {
2808                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2809         }
2810
2811         unless ($outer_xact) {
2812                 $log->debug("Commiting transaction started by Ingest.", INFO);
2813                 my ($c) = $commit->run;
2814                 unless (defined $c and $c) {
2815                         $rollback->run;
2816                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2817                 }
2818         }
2819
2820         return $ret;
2821 }
2822 __PACKAGE__->register_method( 
2823         api_name        => "open-ils.worm.authortiy.wormize",
2824         method          => "wormize",
2825         api_level       => 1,
2826         argc            => 1,
2827 );
2828 __PACKAGE__->register_method( 
2829         api_name        => "open-ils.worm.authority.wormize.batch",
2830         method          => "wormize",
2831         api_level       => 1,
2832         argc            => 1,
2833 );
2834
2835
2836 # --------------------------------------------------------------------------------
2837
2838
2839 sub _marcxml_to_full_rows {
2840
2841         my $marcxml = shift;
2842         my $type = shift || 'Fieldmapper::metabib::full_rec';
2843
2844         my @ns_list;
2845         
2846         my $root = $marcxml->documentElement;
2847
2848         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2849                 next unless $tagline;
2850
2851                 my $ns = new Fieldmapper::metabib::full_rec;
2852
2853                 $ns->tag( 'LDR' );
2854                 my $val = NFD($tagline->textContent);
2855                 $val =~ s/(\pM+)//gso;
2856                 $ns->value( $val );
2857
2858                 push @ns_list, $ns;
2859         }
2860
2861         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2862                 next unless $tagline;
2863
2864                 my $ns = new Fieldmapper::metabib::full_rec;
2865
2866                 $ns->tag( $tagline->getAttribute( "tag" ) );
2867                 my $val = NFD($tagline->textContent);
2868                 $val =~ s/(\pM+)//gso;
2869                 $ns->value( $val );
2870
2871                 push @ns_list, $ns;
2872         }
2873
2874         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2875                 next unless $tagline;
2876
2877                 my $tag = $tagline->getAttribute( "tag" );
2878                 my $ind1 = $tagline->getAttribute( "ind1" );
2879                 my $ind2 = $tagline->getAttribute( "ind2" );
2880
2881                 for my $data ( $tagline->childNodes ) {
2882                         next unless $data;
2883
2884                         my $ns = $type->new;
2885
2886                         $ns->tag( $tag );
2887                         $ns->ind1( $ind1 );
2888                         $ns->ind2( $ind2 );
2889                         $ns->subfield( $data->getAttribute( "code" ) );
2890                         my $val = NFD($data->textContent);
2891                         $val =~ s/(\pM+)//gso;
2892                         $ns->value( lc($val) );
2893
2894                         push @ns_list, $ns;
2895                 }
2896         }
2897         return @ns_list;
2898 }
2899
2900 sub _get_field_value {
2901
2902         my( $root, $xpath ) = @_;
2903
2904         my $string = "";
2905
2906         # grab the set of matching nodes
2907         my @nodes = $root->findnodes( $xpath );
2908         for my $value (@nodes) {
2909
2910                 # grab all children of the node
2911                 my @children = $value->childNodes();
2912                 for my $child (@children) {
2913
2914                         # add the childs content to the growing buffer
2915                         my $content = quotemeta($child->textContent);
2916                         next if ($string =~ /$content/);  # uniquify the values
2917                         $string .= $child->textContent . " ";
2918                 }
2919                 if( ! @children ) {
2920                         $string .= $value->textContent . " ";
2921                 }
2922         }
2923         $string = NFD($string);
2924         $string =~ s/(\pM)//gso;
2925         return lc($string);
2926 }
2927
2928
2929 sub modsdoc_to_values {
2930         my( $self, $mods ) = @_;
2931         my $data = {};
2932         for my $class (keys %$xpathset) {
2933                 $data->{$class} = {};
2934                 for my $type (keys %{$xpathset->{$class}}) {
2935                         $data->{$class}->{$type} = {};
2936                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2937                 }
2938         }
2939         return $data;
2940 }
2941
2942
2943 1;
2944
2945