]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
fixing minor MR mapping bug
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21 our %supported_formats = (
22         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
23         mods    => {ns => 'http://www.loc.gov/mods/'},
24         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
26         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28         atom    => {ns => 'http://www.w3.org/2005/Atom'},
29         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
30         rss092  => {ns => ''},
31         rss093  => {ns => ''},
32         rss094  => {ns => ''},
33         rss10   => {ns => 'http://purl.org/rss/1.0/'},
34         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
35         rss2    => {ns => ''},
36 );
37
38
39 my $log = 'OpenSRF::Utils::Logger';
40
41 my  $parser = XML::LibXML->new();
42 my  $xslt = XML::LibXSLT->new();
43
44 my  $mods_sheet;
45 my  $mads_sheet;
46 my  $xpathset = {};
47 sub initialize {}
48 sub child_init {}
49
50 sub post_init {
51
52         unless (keys %$xpathset) {
53                 $log->debug("Running post_init", DEBUG);
54
55                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56
57                 unless ($supported_formats{mods}{xslt}) {
58                         $log->debug("Loading MODS XSLT", DEBUG);
59                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61                 }
62
63                 unless ($supported_formats{mods3}{xslt}) {
64                         $log->debug("Loading MODS v3 XSLT", DEBUG);
65                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67                 }
68
69
70                 my $req = OpenSRF::AppSession
71                                 ->create('open-ils.cstore')
72                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
73                                 ->gather(1);
74
75                 if (ref $req and @$req) {
76                         for my $f (@$req) {
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
81                         }
82                 }
83         }
84 }
85
86 sub entityize {
87         my $stuff = shift;
88         my $form = shift;
89
90         if ($form eq 'D') {
91                 $stuff = NFD($stuff);
92         } else {
93                 $stuff = NFC($stuff);
94         }
95
96         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
97         return $stuff;
98 }
99
100 # --------------------------------------------------------------------------------
101 # Biblio ingest
102
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
106
107 sub rw_biblio_ingest_single_object {
108         my $self = shift;
109         my $client = shift;
110         my $bib = shift;
111
112         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113         return undef unless ($blob);
114
115         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116         $bib->quality( $blob->{fingerprint}->{quality} );
117
118         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
119
120         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
121
122         # update full_rec stuff ...
123         my $tmp = $cstore->request(
124                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125                 { record => $bib->id }
126         )->gather(1);
127
128         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
130
131         # update rec_descriptor stuff ...
132         $tmp = $cstore->request(
133                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134                 { record => $bib->id }
135         )->gather(1);
136
137         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
139
140         # deal with classed fields...
141         for my $class ( qw/title author subject keyword series/ ) {
142                 $tmp = $cstore->request(
143                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144                         { source => $bib->id }
145                 )->gather(1);
146
147                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
148         }
149         for my $obj ( @{ $blob->{field_entries} } ) {
150                 my $class = $obj->class_name;
151                 $class =~ s/^Fieldmapper:://o;
152                 $class =~ s/::/./go;
153                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
154         }
155
156         # update MR map ...
157
158         $tmp = $cstore->request(
159                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160                 { source => $bib->id }
161         )->gather(1);
162
163         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
164
165
166         # Get the matchin MR, if any.
167         my $mr = $cstore->request(
168                 'open-ils.cstore.direct.metabib.metarecord.search',
169                 { fingerprint => $bib->fingerprint }
170         )->gather(1);
171
172         if (!$mr) {
173                 $mr = new Fieldmapper::metabib::metarecord;
174                 $mr->fingerprint( $bib->fingerprint );
175                 $mr->master_record( $bib->id );
176                 $mr->id(
177                         $cstore->request(
178                                 "open-ils.cstore.direct.metabib.metarecord.create",
179                                 $mr => { quiet => 'true' }
180                         )->gather(1)
181                 );
182         } else {
183                 my $mrm = $cstore->request(
184                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185                         { metarecord => $mr->id }
186                 )->gather(1);
187
188                 if (@$mrm) {
189                         my $best = $cstore->request(
190                                 "open-ils.cstore.direct.biblio.record_entry.search",
191                                 { id => [ map { $_->source } @$mrm ] },
192                                 { 'select'      => { bre => [ qw/id quality/ ] },
193                                 order_by        => { bre => "quality desc" },
194                                 limit           => 1,
195                                 }
196                         )->gather(1);
197
198                         if ($best->quality > $bib->quality) {
199                                 $mr->master_record($best->id);
200                         } else {
201                                 $mr->master_record($bib->id);
202                         }
203                 } else {
204                         $mr->master_record($bib->id);
205                 }
206
207                 $mr->clear_mods;
208
209                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
210         }
211
212         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
213         $mrm->source($bib->id);
214         $mrm->metarecord($mr->id);
215
216         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
217         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
218
219         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
220
221         return $bib->id;
222 }
223 __PACKAGE__->register_method(  
224         api_name        => "open-ils.ingest.full.biblio.object",
225         method          => "rw_biblio_ingest_single_object",
226         api_level       => 1,
227         argc            => 1,
228 );                      
229
230 sub rw_biblio_ingest_single_record {
231         my $self = shift;
232         my $client = shift;
233         my $rec = shift;
234
235         OpenILS::Application::Ingest->post_init();
236         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
237         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
238
239         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
240
241         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
242         $cstore->disconnect;
243
244         return undef unless ($r and @$r);
245
246         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
247 }
248 __PACKAGE__->register_method(  
249         api_name        => "open-ils.ingest.full.biblio.record",
250         method          => "rw_biblio_ingest_single_record",
251         api_level       => 1,
252         argc            => 1,
253 );                      
254
255 sub ro_biblio_ingest_single_object {
256         my $self = shift;
257         my $client = shift;
258         my $bib = shift;
259         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
260
261         my $document = $parser->parse_string($xml);
262
263         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
264         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
265         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
266         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
267
268         $_->source($bib->id) for (@mXfe);
269         $_->record($bib->id) for (@mfr);
270         $rd->record($bib->id) if ($rd);
271
272         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
273 }
274 __PACKAGE__->register_method(  
275         api_name        => "open-ils.ingest.full.biblio.object.readonly",
276         method          => "ro_biblio_ingest_single_object",
277         api_level       => 1,
278         argc            => 1,
279 );                      
280
281 sub ro_biblio_ingest_single_xml {
282         my $self = shift;
283         my $client = shift;
284         my $xml = OpenILS::Application::Ingest::entityize(shift);
285
286         my $document = $parser->parse_string($xml);
287
288         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
289         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
290         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
291         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
292
293         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
294 }
295 __PACKAGE__->register_method(  
296         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
297         method          => "ro_biblio_ingest_single_xml",
298         api_level       => 1,
299         argc            => 1,
300 );                      
301
302 sub ro_biblio_ingest_single_record {
303         my $self = shift;
304         my $client = shift;
305         my $rec = shift;
306
307         OpenILS::Application::Ingest->post_init();
308         my $r = OpenSRF::AppSession
309                         ->create('open-ils.cstore')
310                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
311                         ->gather(1);
312
313         return undef unless ($r and @$r);
314
315         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
316
317         $_->source($rec) for (@{$res->{field_entries}});
318         $_->record($rec) for (@{$res->{full_rec}});
319         $res->{descriptor}->record($rec);
320
321         return $res;
322 }
323 __PACKAGE__->register_method(  
324         api_name        => "open-ils.ingest.full.biblio.record.readonly",
325         method          => "ro_biblio_ingest_single_record",
326         api_level       => 1,
327         argc            => 1,
328 );                      
329
330 sub ro_biblio_ingest_stream_record {
331         my $self = shift;
332         my $client = shift;
333
334         OpenILS::Application::Ingest->post_init();
335
336         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
337
338         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
339         
340                 my $rec = $resp->content;
341                 last unless (defined $rec);
342
343                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
344                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
345
346                 $_->source($rec) for (@{$res->{field_entries}});
347                 $_->record($rec) for (@{$res->{full_rec}});
348
349                 $client->respond( $res );
350         }
351
352         return undef;
353 }
354 __PACKAGE__->register_method(  
355         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
356         method          => "ro_biblio_ingest_stream_record",
357         api_level       => 1,
358         stream          => 1,
359 );                      
360
361 sub ro_biblio_ingest_stream_xml {
362         my $self = shift;
363         my $client = shift;
364
365         OpenILS::Application::Ingest->post_init();
366
367         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
368
369         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
370         
371                 my $xml = $resp->content;
372                 last unless (defined $xml);
373
374                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
375                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
376
377                 $client->respond( $res );
378         }
379
380         return undef;
381 }
382 __PACKAGE__->register_method(  
383         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
384         method          => "ro_biblio_ingest_stream_xml",
385         api_level       => 1,
386         stream          => 1,
387 );                      
388
389 sub rw_biblio_ingest_stream_import {
390         my $self = shift;
391         my $client = shift;
392
393         OpenILS::Application::Ingest->post_init();
394
395         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
396
397         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
398         
399                 my $bib = $resp->content;
400                 last unless (defined $bib);
401
402                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
403                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
404
405                 $_->source($bib->id) for (@{$res->{field_entries}});
406                 $_->record($bib->id) for (@{$res->{full_rec}});
407
408                 $client->respond( $res );
409         }
410
411         return undef;
412 }
413 __PACKAGE__->register_method(  
414         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
415         method          => "rw_biblio_ingest_stream_import",
416         api_level       => 1,
417         stream          => 1,
418 );                      
419
420
421 # --------------------------------------------------------------------------------
422 # Authority ingest
423
424 package OpenILS::Application::Ingest::Authority;
425 use base qw/OpenILS::Application::Ingest/;
426 use Unicode::Normalize;
427
428 sub ro_authority_ingest_single_object {
429         my $self = shift;
430         my $client = shift;
431         my $bib = shift;
432         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
433
434         my $document = $parser->parse_string($xml);
435
436         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
437
438         $_->record($bib->id) for (@mfr);
439
440         return { full_rec => \@mfr };
441 }
442 __PACKAGE__->register_method(  
443         api_name        => "open-ils.ingest.full.authority.object.readonly",
444         method          => "ro_authority_ingest_single_object",
445         api_level       => 1,
446         argc            => 1,
447 );                      
448
449 sub ro_authority_ingest_single_xml {
450         my $self = shift;
451         my $client = shift;
452         my $xml = OpenILS::Application::Ingest::entityize(shift);
453
454         my $document = $parser->parse_string($xml);
455
456         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
457
458         return { full_rec => \@mfr };
459 }
460 __PACKAGE__->register_method(  
461         api_name        => "open-ils.ingest.full.authority.xml.readonly",
462         method          => "ro_authority_ingest_single_xml",
463         api_level       => 1,
464         argc            => 1,
465 );                      
466
467 sub ro_authority_ingest_single_record {
468         my $self = shift;
469         my $client = shift;
470         my $rec = shift;
471
472         OpenILS::Application::Ingest->post_init();
473         my $r = OpenSRF::AppSession
474                         ->create('open-ils.cstore')
475                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
476                         ->gather(1);
477
478         return undef unless ($r and @$r);
479
480         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
481
482         $_->record($rec) for (@{$res->{full_rec}});
483         $res->{descriptor}->record($rec);
484
485         return $res;
486 }
487 __PACKAGE__->register_method(  
488         api_name        => "open-ils.ingest.full.authority.record.readonly",
489         method          => "ro_authority_ingest_single_record",
490         api_level       => 1,
491         argc            => 1,
492 );                      
493
494 sub ro_authority_ingest_stream_record {
495         my $self = shift;
496         my $client = shift;
497
498         OpenILS::Application::Ingest->post_init();
499
500         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
501
502         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
503         
504                 my $rec = $resp->content;
505                 last unless (defined $rec);
506
507                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
508                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
509
510                 $_->record($rec) for (@{$res->{full_rec}});
511
512                 $client->respond( $res );
513         }
514
515         return undef;
516 }
517 __PACKAGE__->register_method(  
518         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
519         method          => "ro_authority_ingest_stream_record",
520         api_level       => 1,
521         stream          => 1,
522 );                      
523
524 sub ro_authority_ingest_stream_xml {
525         my $self = shift;
526         my $client = shift;
527
528         OpenILS::Application::Ingest->post_init();
529
530         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
531
532         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
533         
534                 my $xml = $resp->content;
535                 last unless (defined $xml);
536
537                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
538                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
539
540                 $client->respond( $res );
541         }
542
543         return undef;
544 }
545 __PACKAGE__->register_method(  
546         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
547         method          => "ro_authority_ingest_stream_xml",
548         api_level       => 1,
549         stream          => 1,
550 );                      
551
552 sub rw_authority_ingest_stream_import {
553         my $self = shift;
554         my $client = shift;
555
556         OpenILS::Application::Ingest->post_init();
557
558         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
559
560         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
561         
562                 my $bib = $resp->content;
563                 last unless (defined $bib);
564
565                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
566                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
567
568                 $_->record($bib->id) for (@{$res->{full_rec}});
569
570                 $client->respond( $res );
571         }
572
573         return undef;
574 }
575 __PACKAGE__->register_method(  
576         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
577         method          => "rw_authority_ingest_stream_import",
578         api_level       => 1,
579         stream          => 1,
580 );                      
581
582
583 # --------------------------------------------------------------------------------
584 # MARC index extraction
585
586 package OpenILS::Application::Ingest::XPATH;
587 use base qw/OpenILS::Application::Ingest/;
588 use Unicode::Normalize;
589
590 # give this an XML documentElement and an XPATH expression
591 sub xpath_to_string {
592         my $xml = shift;
593         my $xpath = shift;
594         my $ns_uri = shift;
595         my $ns_prefix = shift;
596         my $unique = shift;
597
598         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
599
600         my $string = "";
601
602         # grab the set of matching nodes
603         my @nodes = $xml->findnodes( $xpath );
604         for my $value (@nodes) {
605
606                 # grab all children of the node
607                 my @children = $value->childNodes();
608                 for my $child (@children) {
609
610                         # add the childs content to the growing buffer
611                         my $content = quotemeta($child->textContent);
612                         next if ($unique && $string =~ /$content/);  # uniquify the values
613                         $string .= $child->textContent . " ";
614                 }
615                 if( ! @children ) {
616                         $string .= $value->textContent . " ";
617                 }
618         }
619         return NFD($string);
620 }
621
622 sub class_index_string_xml {
623         my $self = shift;
624         my $client = shift;
625         my $xml = shift;
626         my @classes = @_;
627
628         OpenILS::Application::Ingest->post_init();
629         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
630
631         my %transform_cache;
632         
633         for my $class (@classes) {
634                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
635                 for my $type ( keys %{ $xpathset->{$class} } ) {
636
637                         my $def = $xpathset->{$class}->{$type};
638                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
639
640                         my $document = $xml;
641
642                         if ($sf->{xslt}) {
643                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
644                                 $transform_cache{$def->{format}} = $document;
645                         }
646
647                         my $value =  xpath_to_string(
648                                         $document->documentElement      => $def->{xpath},
649                                         $sf->{ns}                       => $def->{format},
650                                         1
651                         );
652
653                         next unless $value;
654
655                         $value = NFD($value);
656                         $value =~ s/\pM+//sgo;
657                         $value =~ s/\pC+//sgo;
658                         $value =~ s/\W+$//sgo;
659
660                         $value =~ s/(\w)\.+(\w)/$1$2/sgo;
661                         $value = lc($value);
662
663                         my $fm = $class_constructor->new;
664                         $fm->value( $value );
665                         $fm->field( $xpathset->{$class}->{$type}->{id} );
666                         $client->respond($fm);
667                 }
668         }
669         return undef;
670 }
671 __PACKAGE__->register_method(  
672         api_name        => "open-ils.ingest.field_entry.class.xml",
673         method          => "class_index_string_xml",
674         api_level       => 1,
675         argc            => 2,
676         stream          => 1,
677 );                      
678
679 sub class_index_string_record {
680         my $self = shift;
681         my $client = shift;
682         my $rec = shift;
683         my @classes = shift;
684
685         OpenILS::Application::Ingest->post_init();
686         my $r = OpenSRF::AppSession
687                         ->create('open-ils.cstore')
688                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
689                         ->gather(1);
690
691         return undef unless ($r and @$r);
692
693         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
694                 $fm->source($rec);
695                 $client->respond($fm);
696         }
697         return undef;
698 }
699 __PACKAGE__->register_method(  
700         api_name        => "open-ils.ingest.field_entry.class.record",
701         method          => "class_index_string_record",
702         api_level       => 1,
703         argc            => 2,
704         stream          => 1,
705 );                      
706
707 sub all_index_string_xml {
708         my $self = shift;
709         my $client = shift;
710         my $xml = shift;
711
712         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
713                 $client->respond($fm);
714         }
715         return undef;
716 }
717 __PACKAGE__->register_method(  
718         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
719         method          => "all_index_string_xml",
720         api_level       => 1,
721         argc            => 1,
722         stream          => 1,
723 );                      
724
725 sub all_index_string_record {
726         my $self = shift;
727         my $client = shift;
728         my $rec = shift;
729
730         OpenILS::Application::Ingest->post_init();
731         my $r = OpenSRF::AppSession
732                         ->create('open-ils.cstore')
733                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
734                         ->gather(1);
735
736         return undef unless ($r and @$r);
737
738         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
739                 $fm->source($rec);
740                 $client->respond($fm);
741         }
742         return undef;
743 }
744 __PACKAGE__->register_method(  
745         api_name        => "open-ils.ingest.extract.field_entry.all.record",
746         method          => "all_index_string_record",
747         api_level       => 1,
748         argc            => 1,
749         stream          => 1,
750 );                      
751
752 # --------------------------------------------------------------------------------
753 # Flat MARC
754
755 package OpenILS::Application::Ingest::FlatMARC;
756 use base qw/OpenILS::Application::Ingest/;
757 use Unicode::Normalize;
758
759
760 sub _marcxml_to_full_rows {
761
762         my $marcxml = shift;
763         my $xmltype = shift || 'metabib';
764
765         my $type = "Fieldmapper::${xmltype}::full_rec";
766
767         my @ns_list;
768         
769         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
770
771         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
772                 next unless $tagline;
773
774                 my $ns = $type->new;
775
776                 $ns->tag( 'LDR' );
777                 my $val = $tagline->textContent;
778                 $val = NFD($val);
779                 $val =~ s/\pM+//sgo;
780                 $val =~ s/\pC+//sgo;
781                 $val =~ s/\W+$//sgo;
782                 $ns->value( $val );
783
784                 push @ns_list, $ns;
785         }
786
787         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
788                 next unless $tagline;
789
790                 my $ns = $type->new;
791
792                 $ns->tag( $tagline->getAttribute( "tag" ) );
793                 my $val = $tagline->textContent;
794                 $val = NFD($val);
795                 $val =~ s/\pM+//sgo;
796                 $val =~ s/\pC+//sgo;
797                 $val =~ s/\W+$//sgo;
798                 $ns->value( $val );
799
800                 push @ns_list, $ns;
801         }
802
803         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
804                 next unless $tagline;
805
806                 my $tag = $tagline->getAttribute( "tag" );
807                 my $ind1 = $tagline->getAttribute( "ind1" );
808                 my $ind2 = $tagline->getAttribute( "ind2" );
809
810                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
811                         next unless $data;
812
813                         my $ns = $type->new;
814
815                         $ns->tag( $tag );
816                         $ns->ind1( $ind1 );
817                         $ns->ind2( $ind2 );
818                         $ns->subfield( $data->getAttribute( "code" ) );
819                         my $val = $data->textContent;
820                         $val = NFD($val);
821                         $val =~ s/\pM+//sgo;
822                         $val =~ s/\pC+//sgo;
823                         $val =~ s/\W+$//sgo;
824                         $ns->value( lc($val) );
825
826                         push @ns_list, $ns;
827                 }
828         }
829
830         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
831         return @ns_list;
832 }
833
834 sub flat_marc_xml {
835         my $self = shift;
836         my $client = shift;
837         my $xml = shift;
838
839         $log->debug("processing [$xml]");
840
841         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
842
843         my $type = 'metabib';
844         $type = 'authority' if ($self->api_name =~ /authority/o);
845
846         OpenILS::Application::Ingest->post_init();
847
848         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
849         return undef;
850 }
851 __PACKAGE__->register_method(  
852         api_name        => "open-ils.ingest.flat_marc.authority.xml",
853         method          => "flat_marc_xml",
854         api_level       => 1,
855         argc            => 1,
856         stream          => 1,
857 );                      
858 __PACKAGE__->register_method(  
859         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
860         method          => "flat_marc_xml",
861         api_level       => 1,
862         argc            => 1,
863         stream          => 1,
864 );                      
865
866 sub flat_marc_record {
867         my $self = shift;
868         my $client = shift;
869         my $rec = shift;
870
871         my $type = 'biblio';
872         $type = 'authority' if ($self->api_name =~ /authority/o);
873
874         OpenILS::Application::Ingest->post_init();
875         my $r = OpenSRF::AppSession
876                         ->create('open-ils.cstore')
877                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
878                         ->gather(1);
879
880
881         return undef unless ($r and $r->marc);
882
883         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
884         for my $row (@rows) {
885                 $client->respond($row);
886                 $log->debug(JSON->perl2JSON($row), DEBUG);
887         }
888         return undef;
889 }
890 __PACKAGE__->register_method(  
891         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
892         method          => "flat_marc_record",
893         api_level       => 1,
894         argc            => 1,
895         stream          => 1,
896 );                      
897 __PACKAGE__->register_method(  
898         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
899         method          => "flat_marc_record",
900         api_level       => 1,
901         argc            => 1,
902         stream          => 1,
903 );                      
904
905 # --------------------------------------------------------------------------------
906 # Fingerprinting
907
908 package OpenILS::Application::Ingest::Biblio::Fingerprint;
909 use base qw/OpenILS::Application::Ingest/;
910 use Unicode::Normalize;
911 use OpenSRF::EX qw/:try/;
912
913 sub biblio_fingerprint_record {
914         my $self = shift;
915         my $client = shift;
916         my $rec = shift;
917
918         OpenILS::Application::Ingest->post_init();
919
920         my $r = OpenSRF::AppSession
921                         ->create('open-ils.cstore')
922                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
923                         ->gather(1);
924
925         return undef unless ($r and $r->marc);
926
927         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
928         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
929         $fp->{quality} = int($fp->{quality});
930         return $fp;
931 }
932 __PACKAGE__->register_method(  
933         api_name        => "open-ils.ingest.fingerprint.record",
934         method          => "biblio_fingerprint_record",
935         api_level       => 1,
936         argc            => 1,
937 );                      
938
939 our $fp_script;
940 sub biblio_fingerprint {
941         my $self = shift;
942         my $client = shift;
943         my $xml = OpenILS::Application::Ingest::entityize(shift);
944
945         $log->internal("Got MARC [$xml]");
946
947         if(!$fp_script) {
948                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
949                 my $conf = OpenSRF::Utils::SettingsClient->new;
950
951                 my $libs        = $conf->config_value(@pfx, 'script_path');
952                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
953                 my $script_libs = (ref($libs)) ? $libs : [$libs];
954
955                 $log->debug("Loading script $script_file for biblio fingerprinting...");
956                 
957                 $fp_script = new OpenILS::Utils::ScriptRunner
958                         ( file          => $script_file,
959                           paths         => $script_libs,
960                           reset_count   => 100 );
961         }
962
963         $fp_script->insert('environment' => {marc => $xml} => 1);
964
965         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
966         $log->debug("Script for biblio fingerprinting completed successfully...");
967
968         return $res;
969 }
970 __PACKAGE__->register_method(  
971         api_name        => "open-ils.ingest.fingerprint.xml",
972         method          => "biblio_fingerprint",
973         api_level       => 1,
974         argc            => 1,
975 );                      
976
977 our $rd_script;
978 sub biblio_descriptor {
979         my $self = shift;
980         my $client = shift;
981         my $xml = OpenILS::Application::Ingest::entityize(shift);
982
983         $log->internal("Got MARC [$xml]");
984
985         if(!$rd_script) {
986                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
987                 my $conf = OpenSRF::Utils::SettingsClient->new;
988
989                 my $libs        = $conf->config_value(@pfx, 'script_path');
990                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
991                 my $script_libs = (ref($libs)) ? $libs : [$libs];
992
993                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
994                 
995                 $rd_script = new OpenILS::Utils::ScriptRunner
996                         ( file          => $script_file,
997                           paths         => $script_libs,
998                           reset_count   => 100 );
999         }
1000
1001         $log->debug("Setting up environment for descriptor extraction script...");
1002         $rd_script->insert('environment.marc' => $xml => 1);
1003         $log->debug("Environment building complete...");
1004
1005         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1006         $log->debug("Script for biblio descriptor extraction completed successfully");
1007
1008         return $res;
1009 }
1010 __PACKAGE__->register_method(  
1011         api_name        => "open-ils.ingest.descriptor.xml",
1012         method          => "biblio_descriptor",
1013         api_level       => 1,
1014         argc            => 1,
1015 );                      
1016
1017
1018 1;
1019
1020 __END__
1021
1022 sub in_transaction {
1023         OpenILS::Application::Ingest->post_init();
1024         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1025 }
1026
1027 sub begin_transaction {
1028         my $self = shift;
1029         my $client = shift;
1030         
1031         OpenILS::Application::Ingest->post_init();
1032         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1033         
1034         try {
1035                 if (!$outer_xact) {
1036                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1037                         #__PACKAGE__->st_sess->connect;
1038                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1039                         unless (defined $r and $r) {
1040                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1041                                 #__PACKAGE__->st_sess->disconnect;
1042                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1043                         }
1044                 }
1045         } otherwise {
1046                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1047         };
1048
1049         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1050 }
1051
1052 sub rollback_transaction {
1053         my $self = shift;
1054         my $client = shift;
1055
1056         OpenILS::Application::Ingest->post_init();
1057         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1058
1059         try {
1060                 if ($outer_xact) {
1061                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1062                 } else {
1063                         $log->debug("Ingest isn't inside a transaction.", INFO);
1064                 }
1065         } catch Error with {
1066                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1067         };
1068
1069         return 1;
1070 }
1071
1072 sub commit_transaction {
1073         my $self = shift;
1074         my $client = shift;
1075
1076         OpenILS::Application::Ingest->post_init();
1077         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1078
1079         try {
1080                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1081                 if ($outer_xact) {
1082                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1083                         unless (defined $r and $r) {
1084                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1085                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1086                         }
1087                         #__PACKAGE__->st_sess->disconnect;
1088                 } else {
1089                         $log->debug("Ingest isn't inside a transaction.", INFO);
1090                 }
1091         } catch Error with {
1092                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1093         };
1094
1095         return 1;
1096 }
1097
1098 sub storage_req {
1099         my $self = shift;
1100         my $method = shift;
1101         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1102         return shift( @res );
1103 }
1104
1105 sub scrub_authority_record {
1106         my $self = shift;
1107         my $client = shift;
1108         my $rec = shift;
1109
1110         my $commit = 0;
1111         if (!OpenILS::Application::Ingest->in_transaction) {
1112                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1113                 $commit = 1;
1114         }
1115
1116         my $success = 1;
1117         try {
1118                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1119
1120                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1121                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1122
1123                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1124         } otherwise {
1125                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1126                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1127                 $success = 0;
1128         };
1129
1130         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1131         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1132         return $success;
1133 }
1134 __PACKAGE__->register_method(  
1135         api_name        => "open-ils.worm.scrub.authority",
1136         method          => "scrub_authority_record",
1137         api_level       => 1,
1138         argc            => 1,
1139 );                      
1140
1141
1142 sub scrub_metabib_record {
1143         my $self = shift;
1144         my $client = shift;
1145         my $rec = shift;
1146
1147         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1148                 $rec = OpenILS::Application::Ingest->storage_req(
1149                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1150                 );
1151         }
1152
1153         my $commit = 0;
1154         if (!OpenILS::Application::Ingest->in_transaction) {
1155                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1156                 $commit = 1;
1157         }
1158
1159         my $success = 1;
1160         try {
1161                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1162                 
1163                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1164                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1165                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1166                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1167                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1168                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1169                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1170                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1171
1172                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1173                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1174
1175                 for my $mr (@$masters) {
1176                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1177                         my $others = OpenILS::Application::Ingest->storage_req(
1178                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1179
1180                         if (@$others) {
1181                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1182                                 $mr->master_record($others->[0]->source);
1183                                 OpenILS::Application::Ingest->storage_req(
1184                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1185                                         { id => $mr->id },
1186                                         { master_record => $others->[0]->source, mods => undef }
1187                                 );
1188                         } else {
1189                                 warn "Removing metarecord whose master is $rec";
1190                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1191                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1192                                 warn "Metarecord removed";
1193                                 $log->debug( "Metarecord removed", DEBUG);
1194                         }
1195                 }
1196
1197                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1198
1199         } otherwise {
1200                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1201                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1202                 $success = 0;
1203         };
1204
1205         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1206         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1207         return $success;
1208 }
1209 __PACKAGE__->register_method(  
1210         api_name        => "open-ils.worm.scrub.biblio",
1211         method          => "scrub_metabib_record",
1212         api_level       => 1,
1213         argc            => 1,
1214 );                      
1215
1216 sub wormize_biblio_metarecord {
1217         my $self = shift;
1218         my $client = shift;
1219         my $mrec = shift;
1220
1221         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1222
1223         my $count = 0;
1224         for my $r (@$recs) {
1225                 my $success = 0;
1226                 try {
1227                         $success = wormize_biblio_record($self => $client => $r->source);
1228                         $client->respond(
1229                                 { record  => $r->source,
1230                                   metarecord => $rec->metarecord,
1231                                   success => $success,
1232                                 }
1233                         );
1234                 } catch Error with {
1235                         my $e = shift;
1236                         $client->respond(
1237                                 { record  => $r->source,
1238                                   metarecord => $rec->metarecord,
1239                                   success => $success,
1240                                   error   => $e,
1241                                 }
1242                         );
1243                 };
1244         }
1245         return undef;
1246 }
1247 __PACKAGE__->register_method(
1248         api_name        => "open-ils.worm.wormize.metarecord",
1249         method          => "wormize_biblio_metarecord",
1250         api_level       => 1,
1251         argc            => 1,
1252         stream          => 1,
1253 );
1254 __PACKAGE__->register_method(
1255         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1256         method          => "wormize_biblio_metarecord",
1257         api_level       => 1,
1258         argc            => 1,
1259         stream          => 1,
1260 );
1261 __PACKAGE__->register_method(
1262         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1263         method          => "wormize_biblio_metarecord",
1264         api_level       => 1,
1265         argc            => 1,
1266         stream          => 1,
1267 );
1268 __PACKAGE__->register_method(
1269         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1270         method          => "wormize_biblio_metarecord",
1271         api_level       => 1,
1272         argc            => 1,
1273         stream          => 1,
1274 );
1275
1276
1277 sub wormize_biblio_record {
1278         my $self = shift;
1279         my $client = shift;
1280         my $rec = shift;
1281
1282         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1283                 $rec = OpenILS::Application::Ingest->storage_req(
1284                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1285                 );
1286         }
1287
1288
1289         my $commit = 0;
1290         if (!OpenILS::Application::Ingest->in_transaction) {
1291                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1292                 $commit = 1;
1293         }
1294
1295         my $success = 1;
1296         try {
1297                 # clean up the cruft
1298                 unless ($self->api_name =~ /noscrub/o) {
1299                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1300                 }
1301
1302                 # now redo 'em
1303                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1304
1305                 my @full_rec = ();
1306                 my @rec_descriptor = ();
1307                 my %field_entry = (
1308                         title   => [],
1309                         author  => [],
1310                         subject => [],
1311                         keyword => [],
1312                         series  => [],
1313                 );
1314                 my %metarecord = ();
1315                 my @source_map = ();
1316                 for my $r (@$bibs) {
1317                         try {
1318                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1319
1320                                 my $xml = $parser->parse_string($r->marc);
1321
1322                                 #update the fingerprint
1323                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1324                                 OpenILS::Application::Ingest->storage_req(
1325                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1326                                         { id => $r->id },
1327                                         { fingerprint => $fp->{fingerprint},
1328                                           quality     => int($fp->{quality}) }
1329                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1330
1331                                 # the full_rec stuff
1332                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1333                                         $fr->record( $r->id );
1334                                         push @full_rec, $fr;
1335                                 }
1336
1337                                 # the rec_descriptor stuff
1338                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1339                                 $rd->record( $r->id );
1340                                 push @rec_descriptor, $rd;
1341                         
1342                                 # the indexing field entry stuff
1343                                 for my $class ( qw/title author subject keyword series/ ) {
1344                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1345                                                 $fe->source( $r->id );
1346                                                 push @{$field_entry{$class}}, $fe;
1347                                         }
1348                                 }
1349
1350                                 unless ($self->api_name =~ /nomap/o) {
1351                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1352                                 
1353                                         unless ($mr) {
1354                                                 $mr = Fieldmapper::metabib::metarecord->new;
1355                                                 $mr->fingerprint( $fp->{fingerprint} );
1356                                                 $mr->master_record( $r->id );
1357                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1358                                         }
1359
1360                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1361                                         $mr_map->metarecord( $mr->id );
1362                                         $mr_map->source( $r->id );
1363                                         push @source_map, $mr_map;
1364
1365                                         $metarecord{$mr->id} = $mr;
1366                                 }
1367                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1368                         } otherwise {
1369                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1370                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1371                         };
1372                 }
1373                 
1374
1375                 if (@rec_descriptor) {
1376                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1377
1378                         OpenILS::Application::Ingest->storage_req(
1379                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1380                                 @source_map
1381                         ) if (@source_map);
1382
1383                         for my $mr ( values %metarecord ) {
1384                                 my $sources = OpenILS::Application::Ingest->storage_req(
1385                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1386                                         $mr->id
1387                                 );
1388
1389                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1390                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1391                                         [ map { $_->source } @$sources ]
1392                                 );
1393
1394                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1395
1396                                 OpenILS::Application::Ingest->storage_req(
1397                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1398                                         { id => $mr->id },
1399                                         { master_record => $master->id, mods => undef }
1400                                 );
1401                         }
1402
1403                         OpenILS::Application::Ingest->storage_req(
1404                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1405                                 @rec_descriptor
1406                         ) if (@rec_descriptor);
1407
1408                         OpenILS::Application::Ingest->storage_req(
1409                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1410                                 @full_rec
1411                         ) if (@full_rec);
1412
1413                         OpenILS::Application::Ingest->storage_req(
1414                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1415                                 @{ $field_entry{title} }
1416                         ) if (@{ $field_entry{title} });
1417
1418                         OpenILS::Application::Ingest->storage_req(
1419                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1420                                 @{ $field_entry{author} }
1421                         ) if (@{ $field_entry{author} });
1422                         
1423                         OpenILS::Application::Ingest->storage_req(
1424                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1425                                 @{ $field_entry{subject} }
1426                         ) if (@{ $field_entry{subject} });
1427
1428                         OpenILS::Application::Ingest->storage_req(
1429                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1430                                 @{ $field_entry{keyword} }
1431                         ) if (@{ $field_entry{keyword} });
1432
1433                         OpenILS::Application::Ingest->storage_req(
1434                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1435                                 @{ $field_entry{series} }
1436                         ) if (@{ $field_entry{series} });
1437
1438                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1439                 } else {
1440                         $success = 0;
1441                 }
1442
1443         } otherwise {
1444                 $log->debug('Wormization failed : '.shift(), ERROR);
1445                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1446                 $success = 0;
1447         };
1448
1449         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1450         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1451         return $success;
1452 }
1453 __PACKAGE__->register_method(
1454         api_name        => "open-ils.worm.wormize.biblio",
1455         method          => "wormize_biblio_record",
1456         api_level       => 1,
1457         argc            => 1,
1458 );
1459 __PACKAGE__->register_method(
1460         api_name        => "open-ils.worm.wormize.biblio.nomap",
1461         method          => "wormize_biblio_record",
1462         api_level       => 1,
1463         argc            => 1,
1464 );
1465 __PACKAGE__->register_method(
1466         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1467         method          => "wormize_biblio_record",
1468         api_level       => 1,
1469         argc            => 1,
1470 );
1471 __PACKAGE__->register_method(
1472         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1473         method          => "wormize_biblio_record",
1474         api_level       => 1,
1475         argc            => 1,
1476 );
1477
1478 sub wormize_authority_record {
1479         my $self = shift;
1480         my $client = shift;
1481         my $rec = shift;
1482
1483         my $commit = 0;
1484         if (!OpenILS::Application::Ingest->in_transaction) {
1485                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1486                 $commit = 1;
1487         }
1488
1489         my $success = 1;
1490         try {
1491                 # clean up the cruft
1492                 unless ($self->api_name =~ /noscrub/o) {
1493                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1494                 }
1495
1496                 # now redo 'em
1497                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1498
1499                 my @full_rec = ();
1500                 my @rec_descriptor = ();
1501                 for my $r (@$bibs) {
1502                         my $xml = $parser->parse_string($r->marc);
1503
1504                         # the full_rec stuff
1505                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1506                                 $fr->record( $r->id );
1507                                 push @full_rec, $fr;
1508                         }
1509
1510                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1511                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1512                         #$rd->record( $r->id );
1513                         #push @rec_descriptor, $rd;
1514                         
1515                 }
1516
1517                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1518
1519                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1520                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1521
1522                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1523
1524         } otherwise {
1525                 $log->debug('Wormization failed : '.shift(), ERROR);
1526                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1527                 $success = 0;
1528         };
1529
1530         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1531         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1532         return $success;
1533 }
1534 __PACKAGE__->register_method(
1535         api_name        => "open-ils.worm.wormize.authority",
1536         method          => "wormize_authority_record",
1537         api_level       => 1,
1538         argc            => 1,
1539 );
1540 __PACKAGE__->register_method(
1541         api_name        => "open-ils.worm.wormize.authority.noscrub",
1542         method          => "wormize_authority_record",
1543         api_level       => 1,
1544         argc            => 1,
1545 );
1546
1547
1548 # --------------------------------------------------------------------------------
1549 # MARC index extraction
1550
1551 package OpenILS::Application::Ingest::XPATH;
1552 use base qw/OpenILS::Application::Ingest/;
1553 use Unicode::Normalize;
1554
1555 # give this a MODS documentElement and an XPATH expression
1556 sub _xpath_to_string {
1557         my $xml = shift;
1558         my $xpath = shift;
1559         my $ns_uri = shift;
1560         my $ns_prefix = shift;
1561         my $unique = shift;
1562
1563         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1564
1565         my $string = "";
1566
1567         # grab the set of matching nodes
1568         my @nodes = $xml->findnodes( $xpath );
1569         for my $value (@nodes) {
1570
1571                 # grab all children of the node
1572                 my @children = $value->childNodes();
1573                 for my $child (@children) {
1574
1575                         # add the childs content to the growing buffer
1576                         my $content = quotemeta($child->textContent);
1577                         next if ($unique && $string =~ /$content/);  # uniquify the values
1578                         $string .= $child->textContent . " ";
1579                 }
1580                 if( ! @children ) {
1581                         $string .= $value->textContent . " ";
1582                 }
1583         }
1584         return NFD($string);
1585 }
1586
1587 sub class_all_index_string_xml {
1588         my $self = shift;
1589         my $client = shift;
1590         my $xml = shift;
1591         my $class = shift;
1592
1593         OpenILS::Application::Ingest->post_init();
1594         $xml = $parser->parse_string($xml) unless (ref $xml);
1595         
1596         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1597         for my $type ( keys %{ $xpathset->{$class} } ) {
1598                 my $value =  _xpath_to_string(
1599                                 $mods_sheet->transform($xml)->documentElement,
1600                                 $xpathset->{$class}->{$type}->{xpath},
1601                                 "http://www.loc.gov/mods/",
1602                                 "mods",
1603                                 1
1604                 );
1605
1606                 next unless $value;
1607
1608                 $value = NFD($value);
1609                 $value =~ s/\pM+//sgo;
1610                 $value =~ s/\pC+//sgo;
1611                 $value =~ s/\W+$//sgo;
1612
1613                 $value =~ s/(\w)\./$1/sgo;
1614                 $value = lc($value);
1615
1616                 my $fm = $class_constructor->new;
1617                 $fm->value( $value );
1618                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1619                 $client->respond($fm);
1620         }
1621         return undef;
1622 }
1623 __PACKAGE__->register_method(  
1624         api_name        => "open-ils.worm.field_entry.class.xml",
1625         method          => "class_all_index_string_xml",
1626         api_level       => 1,
1627         argc            => 1,
1628         stream          => 1,
1629 );                      
1630
1631 sub class_all_index_string_record {
1632         my $self = shift;
1633         my $client = shift;
1634         my $rec = shift;
1635         my $class = shift;
1636
1637         OpenILS::Application::Ingest->post_init();
1638         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1639
1640         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1641                 $fm->source($rec);
1642                 $client->respond($fm);
1643         }
1644         return undef;
1645 }
1646 __PACKAGE__->register_method(  
1647         api_name        => "open-ils.worm.field_entry.class.record",
1648         method          => "class_all_index_string_record",
1649         api_level       => 1,
1650         argc            => 1,
1651         stream          => 1,
1652 );                      
1653
1654
1655 sub class_index_string_xml {
1656         my $self = shift;
1657         my $client = shift;
1658         my $xml = shift;
1659         my $class = shift;
1660         my $type = shift;
1661
1662         OpenILS::Application::Ingest->post_init();
1663         $xml = $parser->parse_string($xml) unless (ref $xml);
1664         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1665 }
1666 __PACKAGE__->register_method(  
1667         api_name        => "open-ils.worm.class.type.xml",
1668         method          => "class_index_string_xml",
1669         api_level       => 1,
1670         argc            => 1,
1671 );                      
1672
1673 sub class_index_string_record {
1674         my $self = shift;
1675         my $client = shift;
1676         my $rec = shift;
1677         my $class = shift;
1678         my $type = shift;
1679
1680         OpenILS::Application::Ingest->post_init();
1681         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1682
1683         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1684         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1685         return $d;
1686 }
1687 __PACKAGE__->register_method(  
1688         api_name        => "open-ils.worm.class.type.record",
1689         method          => "class_index_string_record",
1690         api_level       => 1,
1691         argc            => 1,
1692 );                      
1693
1694 sub xml_xpath {
1695         my $self = shift;
1696         my $client = shift;
1697         my $xml = shift;
1698         my $xpath = shift;
1699         my $uri = shift;
1700         my $prefix = shift;
1701         my $unique = shift;
1702
1703         OpenILS::Application::Ingest->post_init();
1704         $xml = $parser->parse_string($xml) unless (ref $xml);
1705         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1706 }
1707 __PACKAGE__->register_method(  
1708         api_name        => "open-ils.worm.xpath.xml",
1709         method          => "xml_xpath",
1710         api_level       => 1,
1711         argc            => 1,
1712 );                      
1713
1714 sub record_xpath {
1715         my $self = shift;
1716         my $client = shift;
1717         my $rec = shift;
1718         my $xpath = shift;
1719         my $uri = shift;
1720         my $prefix = shift;
1721         my $unique = shift;
1722
1723         OpenILS::Application::Ingest->post_init();
1724         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1725
1726         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1727         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1728         return $d;
1729 }
1730 __PACKAGE__->register_method(  
1731         api_name        => "open-ils.worm.xpath.record",
1732         method          => "record_xpath",
1733         api_level       => 1,
1734         argc            => 1,
1735 );                      
1736
1737
1738 # --------------------------------------------------------------------------------
1739 # MARC Descriptor
1740
1741 package OpenILS::Application::Ingest::Biblio::Leader;
1742 use base qw/OpenILS::Application::Ingest/;
1743 use Unicode::Normalize;
1744
1745 our %marc_type_groups = (
1746         BKS => q/[at]{1}/,
1747         SER => q/[a]{1}/,
1748         VIS => q/[gkro]{1}/,
1749         MIX => q/[p]{1}/,
1750         MAP => q/[ef]{1}/,
1751         SCO => q/[cd]{1}/,
1752         REC => q/[ij]{1}/,
1753         COM => q/[m]{1}/,
1754 );
1755
1756 sub _type_re {
1757         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1758         return qr/$re/;
1759 }
1760
1761 our %biblio_descriptor_code = (
1762         item_type => sub { substr($ldr,6,1); },
1763         item_form =>
1764                 sub {
1765                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1766                                 return substr($oo8,29,1);
1767                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1768                                 return substr($oo8,23,1);
1769                         }
1770                         return ' ';
1771                 },
1772         bib_level => sub { substr($ldr,7,1); },
1773         control_type => sub { substr($ldr,8,1); },
1774         char_encoding => sub { substr($ldr,9,1); },
1775         enc_level => sub { substr($ldr,17,1); },
1776         cat_form => sub { substr($ldr,18,1); },
1777         pub_status => sub { substr($ldr,5,1); },
1778         item_lang => sub { substr($oo8,35,3); },
1779         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1780         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1781         audience => sub { substr($oo8,22,1); },
1782 );
1783
1784 sub _extract_biblio_descriptors {
1785         my $xml = shift;
1786
1787         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1788         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1789         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1790
1791         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1792         for my $rd_field ( keys %biblio_descriptor_code ) {
1793                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1794         }
1795
1796         return $rd_obj;
1797 }
1798
1799 sub extract_biblio_desc_xml {
1800         my $self = shift;
1801         my $client = shift;
1802         my $xml = shift;
1803
1804         $xml = $parser->parse_string($xml) unless (ref $xml);
1805
1806         return _extract_biblio_descriptors( $xml );
1807 }
1808 __PACKAGE__->register_method(  
1809         api_name        => "open-ils.worm.biblio_leader.xml",
1810         method          => "extract_biblio_desc_xml",
1811         api_level       => 1,
1812         argc            => 1,
1813 );                      
1814
1815 sub extract_biblio_desc_record {
1816         my $self = shift;
1817         my $client = shift;
1818         my $rec = shift;
1819
1820         OpenILS::Application::Ingest->post_init();
1821         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1822
1823         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1824         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1825         return $d;
1826 }
1827 __PACKAGE__->register_method(  
1828         api_name        => "open-ils.worm.biblio_leader.record",
1829         method          => "extract_biblio_desc_record",
1830         api_level       => 1,
1831         argc            => 1,
1832 );                      
1833
1834 # --------------------------------------------------------------------------------
1835 # Flat MARC
1836
1837 package OpenILS::Application::Ingest::FlatMARC;
1838 use base qw/OpenILS::Application::Ingest/;
1839 use Unicode::Normalize;
1840
1841
1842 sub _marcxml_to_full_rows {
1843
1844         my $marcxml = shift;
1845         my $xmltype = shift || 'metabib';
1846
1847         my $type = "Fieldmapper::${xmltype}::full_rec";
1848
1849         my @ns_list;
1850         
1851         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1852
1853         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1854                 next unless $tagline;
1855
1856                 my $ns = $type->new;
1857
1858                 $ns->tag( 'LDR' );
1859                 my $val = $tagline->textContent;
1860                 $val = NFD($val);
1861                 $val =~ s/\pM+//sgo;
1862                 $val =~ s/\pC+//sgo;
1863                 $val =~ s/\W+$//sgo;
1864                 $ns->value( $val );
1865
1866                 push @ns_list, $ns;
1867         }
1868
1869         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1870                 next unless $tagline;
1871
1872                 my $ns = $type->new;
1873
1874                 $ns->tag( $tagline->getAttribute( "tag" ) );
1875                 my $val = $tagline->textContent;
1876                 $val = NFD($val);
1877                 $val =~ s/\pM+//sgo;
1878                 $val =~ s/\pC+//sgo;
1879                 $val =~ s/\W+$//sgo;
1880                 $ns->value( $val );
1881
1882                 push @ns_list, $ns;
1883         }
1884
1885         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1886                 next unless $tagline;
1887
1888                 my $tag = $tagline->getAttribute( "tag" );
1889                 my $ind1 = $tagline->getAttribute( "ind1" );
1890                 my $ind2 = $tagline->getAttribute( "ind2" );
1891
1892                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1893                         next unless $data;
1894
1895                         my $ns = $type->new;
1896
1897                         $ns->tag( $tag );
1898                         $ns->ind1( $ind1 );
1899                         $ns->ind2( $ind2 );
1900                         $ns->subfield( $data->getAttribute( "code" ) );
1901                         my $val = $data->textContent;
1902                         $val = NFD($val);
1903                         $val =~ s/\pM+//sgo;
1904                         $val =~ s/\pC+//sgo;
1905                         $val =~ s/\W+$//sgo;
1906                         $ns->value( lc($val) );
1907
1908                         push @ns_list, $ns;
1909                 }
1910         }
1911
1912         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1913         return @ns_list;
1914 }
1915
1916 sub flat_marc_xml {
1917         my $self = shift;
1918         my $client = shift;
1919         my $xml = shift;
1920
1921         $xml = $parser->parse_string($xml) unless (ref $xml);
1922
1923         my $type = 'metabib';
1924         $type = 'authority' if ($self->api_name =~ /authority/o);
1925
1926         OpenILS::Application::Ingest->post_init();
1927
1928         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1929         return undef;
1930 }
1931 __PACKAGE__->register_method(  
1932         api_name        => "open-ils.worm.flat_marc.authority.xml",
1933         method          => "flat_marc_xml",
1934         api_level       => 1,
1935         argc            => 1,
1936         stream          => 1,
1937 );                      
1938 __PACKAGE__->register_method(  
1939         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1940         method          => "flat_marc_xml",
1941         api_level       => 1,
1942         argc            => 1,
1943         stream          => 1,
1944 );                      
1945
1946 sub flat_marc_record {
1947         my $self = shift;
1948         my $client = shift;
1949         my $rec = shift;
1950
1951         my $type = 'biblio';
1952         $type = 'authority' if ($self->api_name =~ /authority/o);
1953
1954         OpenILS::Application::Ingest->post_init();
1955         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1956
1957         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1958         return undef;
1959 }
1960 __PACKAGE__->register_method(  
1961         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1962         method          => "flat_marc_record",
1963         api_level       => 1,
1964         argc            => 1,
1965         stream          => 1,
1966 );                      
1967 __PACKAGE__->register_method(  
1968         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1969         method          => "flat_marc_record",
1970         api_level       => 1,
1971         argc            => 1,
1972         stream          => 1,
1973 );                      
1974
1975
1976 # --------------------------------------------------------------------------------
1977 # Fingerprinting
1978
1979 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1980 use base qw/OpenILS::Application::Ingest/;
1981 use Unicode::Normalize;
1982 use OpenSRF::EX qw/:try/;
1983
1984 my @fp_mods_xpath = (
1985         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1986                         title   => {
1987                                         xpath   => [
1988                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1989                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1990                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1991                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1992                                         ],
1993                                         fixup   => sub {
1994                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1995                                                         $text = NFD($text);
1996                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1997                                                         $text =~ s/\pM+//gso;
1998                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1999                                                         $text = lc($text);
2000                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2001                                                         $text =~ s/\s+/ /sgo;
2002                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2003                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2004                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2005                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2006                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2007                                                         $text =~ s/\[.[^\]]+\]//sgo;
2008                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2009                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2010                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2011                                                 },
2012                         },
2013                         author  => {
2014                                         xpath   => [
2015                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2016                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2017                                         ],
2018                                         fixup   => sub {
2019                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2020                                                         $text = NFD($text);
2021                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2022                                                         $text =~ s/\pM+//gso;
2023                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2024                                                         $text = lc($text);
2025                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2026                                                         $text =~ s/\s+/ /sgo;
2027                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2028                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2029                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2030                                                         $text =~ s/,?\s+.*$//sgo;
2031                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2032                                                 },
2033                         },
2034         ],
2035
2036         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2037                         title   => {
2038                                         xpath   => [
2039                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2040                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2041                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2042                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2043                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2044                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2045                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2046                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2047                                         ],
2048                                         fixup   => sub {
2049                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2050                                                         $text = NFD($text);
2051                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2052                                                         $text =~ s/\pM+//gso;
2053                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2054                                                         $text = lc($text);
2055                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2056                                                         $text =~ s/\s+/ /sgo;
2057                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2058                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2059                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2060                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2061                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2062                                                         $text =~ s/\[.[^\]]+\]//sgo;
2063                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2064                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2065                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2066                                                 },
2067                         },
2068                         author  => {
2069                                         xpath   => [
2070                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2071                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2072                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2073                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2074                                         ],
2075                                         fixup   => sub {
2076                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2077                                                         $text = NFD($text);
2078                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2079                                                         $text =~ s/\pM+//gso;
2080                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2081                                                         $text = lc($text);
2082                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2083                                                         $text =~ s/\s+/ /sgo;
2084                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2085                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2086                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2087                                                         $text =~ s/,?\s+.*$//sgo;
2088                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2089                                                 },
2090                         },
2091         ],
2092
2093 );
2094
2095 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2096
2097 sub _fp_mods {
2098         my $mods = shift;
2099         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2100
2101         my $fp_string = '';
2102
2103         my $match_index = 0;
2104         my $block_index = 1;
2105         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2106                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2107
2108                         my $block_name_index = 0;
2109                         my $block_value_index = 1;
2110                         my $block = $fp_mods_xpath[$block_index];
2111                         while ( my $part = $$block[$block_value_index] ) {
2112                                 local $text;
2113                                 for my $xpath ( @{ $part->{xpath} } ) {
2114                                         $text = $mods->findvalue( $xpath );
2115                                         last if ($text);
2116                                 }
2117
2118                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2119
2120                                 if ($text) {
2121                                         $$part{fixup}->();
2122                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2123                                         $fp_string .= $text;
2124                                 }
2125
2126                                 $block_name_index += 2;
2127                                 $block_value_index += 2;
2128                         }
2129                 }
2130                 if ($fp_string) {
2131                         $fp_string =~ s/\W+//gso;
2132                         $log->debug("Fingerprint is [$fp_string]", INFO);;
2133                         return $fp_string;
2134                 }
2135
2136                 $match_index += 2;
2137                 $block_index += 2;
2138         }
2139         return undef;
2140 }
2141
2142 sub refingerprint_bibrec {
2143         my $self = shift;
2144         my $client = shift;
2145         my $rec = shift;
2146
2147         my $commit = 0;
2148         if (!OpenILS::Application::Ingest->in_transaction) {
2149                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2150                 $commit = 1;
2151         }
2152
2153         my $success = 1;
2154         try {
2155                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2156                 for my $b (@$bibs) {
2157                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2158
2159                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2160
2161                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2162
2163                                 OpenILS::Application::Ingest->storage_req(
2164                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2165                                         { id => $b->id },
2166                                         { fingerprint => $fp->{fingerprint},
2167                                           quality     => $fp->{quality} }
2168                                 );
2169
2170                                 if ($self->api_name !~ /nomap/o) {
2171                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2172                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2173                                                 $b->id
2174                                         );
2175
2176                                         my $old_mrid;
2177                                         if (ref($old_source_map) and @$old_source_map) {
2178                                                 for my $m (@$old_source_map) {
2179                                                         $old_mrid = $m->metarecord;
2180                                                         OpenILS::Application::Ingest->storage_req(
2181                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2182                                                                 $m->id
2183                                                         );
2184                                                 }
2185                                         }
2186
2187                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2188                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2189                                                         { metarecord => $old_mrid }
2190                                         ) if ($old_mrid);
2191
2192                                         if (ref($old_sm) and @$old_sm == 0) {
2193                                                 OpenILS::Application::Ingest->storage_req(
2194                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2195                                                         $old_mrid
2196                                                 );
2197                                         }
2198
2199                                         my $mr = OpenILS::Application::Ingest->storage_req(
2200                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2201                                                         { fingerprint => $fp->{fingerprint} }
2202                                         )->[0];
2203                                 
2204                                         unless ($mr) {
2205                                                 $mr = Fieldmapper::metabib::metarecord->new;
2206                                                 $mr->fingerprint( $fp->{fingerprint} );
2207                                                 $mr->master_record( $b->id );
2208                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2209                                         }
2210
2211                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2212                                         $mr_map->metarecord( $mr->id );
2213                                         $mr_map->source( $b->id );
2214                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2215
2216                                 }
2217                         }
2218                         $client->respond($b->id);
2219                 }
2220
2221         } otherwise {
2222                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2223                 $success = 0;
2224         };
2225
2226         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2227         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2228         return undef;
2229 }
2230 __PACKAGE__->register_method(  
2231         api_name        => "open-ils.worm.fingerprint.record.update",
2232         method          => "refingerprint_bibrec",
2233         api_level       => 1,
2234         argc            => 1,
2235         stream          => 1,
2236 );                      
2237
2238 __PACKAGE__->register_method(  
2239         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2240         method          => "refingerprint_bibrec",
2241         api_level       => 1,
2242         argc            => 1,
2243 );                      
2244
2245 =comment
2246
2247 sub fingerprint_bibrec {
2248         my $self = shift;
2249         my $client = shift;
2250         my $rec = shift;
2251
2252         OpenILS::Application::Ingest->post_init();
2253         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2254
2255         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2256         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2257         return $fp;
2258
2259 }
2260 __PACKAGE__->register_method(  
2261         api_name        => "open-ils.worm.fingerprint.record",
2262         method          => "fingerprint_bibrec",
2263         api_level       => 0,
2264         argc            => 1,
2265 );                      
2266
2267
2268 sub fingerprint_mods {
2269         my $self = shift;
2270         my $client = shift;
2271         my $xml = shift;
2272
2273         OpenILS::Application::Ingest->post_init();
2274         my $mods = $parser->parse_string($xml)->documentElement;
2275
2276         return _fp_mods( $mods );
2277 }
2278 __PACKAGE__->register_method(  
2279         api_name        => "open-ils.worm.fingerprint.mods",
2280         method          => "fingerprint_mods",
2281         api_level       => 1,
2282         argc            => 1,
2283 );                      
2284
2285 sub fingerprint_marc {
2286         my $self = shift;
2287         my $client = shift;
2288         my $xml = shift;
2289
2290         $xml = $parser->parse_string($xml) unless (ref $xml);
2291
2292         OpenILS::Application::Ingest->post_init();
2293         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2294         $log->debug("Returning [$fp] as fingerprint", INFO);
2295         return $fp;
2296 }
2297 __PACKAGE__->register_method(  
2298         api_name        => "open-ils.worm.fingerprint.marc",
2299         method          => "fingerprint_marc",
2300         api_level       => 1,
2301         argc            => 1,
2302 );                      
2303
2304
2305 =cut
2306
2307 sub biblio_fingerprint_record {
2308         my $self = shift;
2309         my $client = shift;
2310         my $rec = shift;
2311
2312         OpenILS::Application::Ingest->post_init();
2313
2314         my $marc = OpenILS::Application::Ingest
2315                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2316                         ->marc;
2317
2318         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2319         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2320         return $fp;
2321 }
2322 __PACKAGE__->register_method(  
2323         api_name        => "open-ils.worm.fingerprint.record",
2324         method          => "biblio_fingerprint_record",
2325         api_level       => 1,
2326         argc            => 1,
2327 );                      
2328
2329 our $fp_script;
2330 sub biblio_fingerprint {
2331         my $self = shift;
2332         my $client = shift;
2333         my $marc = shift;
2334
2335         OpenILS::Application::Ingest->post_init();
2336
2337         $marc = $parser->parse_string($marc) unless (ref $marc);
2338
2339         my $mods = OpenILS::Application::Ingest::entityize(
2340                 $mods_sheet
2341                         ->transform( $marc )
2342                         ->documentElement
2343                         ->toString,
2344                 'D'
2345         );
2346
2347         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2348
2349         warn $marc;
2350         $log->internal("Got MARC [$marc]");
2351         $log->internal("Created MODS [$mods]");
2352
2353         if(!$fp_script) {
2354                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2355                 my $conf = OpenSRF::Utils::SettingsClient->new;
2356
2357                 my $libs        = $conf->config_value(@pfx, 'script_path');
2358                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2359                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2360
2361                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2362                 
2363                 $fp_script = new OpenILS::Utils::ScriptRunner
2364                         ( file          => $script_file,
2365                           paths         => $script_libs,
2366                           reset_count   => 1000 );
2367         }
2368
2369         $log->debug("Applying environment for biblio fingerprinting...");
2370
2371         my $env = {marc => $marc, mods => $mods};
2372         #my $res = {fingerprint => '', quality => '0'};
2373
2374         $fp_script->insert('environment' => $env);
2375         #$fp_script->insert('result' => $res);
2376
2377         $log->debug("Running script for biblio fingerprinting...");
2378
2379         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2380
2381         $log->debug("Script for biblio fingerprinting completed successfully...");
2382
2383         return $res;
2384 }
2385 __PACKAGE__->register_method(  
2386         api_name        => "open-ils.worm.fingerprint.marc",
2387         method          => "biblio_fingerprint",
2388         api_level       => 1,
2389         argc            => 1,
2390 );                      
2391
2392 # --------------------------------------------------------------------------------
2393
2394 1;
2395
2396 __END__
2397 my $in_xact;
2398 my $begin;
2399 my $commit;
2400 my $rollback;
2401 my $lookup;
2402 my $update_entry;
2403 my $mr_lookup;
2404 my $mr_update;
2405 my $mr_create;
2406 my $create_source_map;
2407 my $sm_lookup;
2408 my $rm_old_rd;
2409 my $rm_old_sm;
2410 my $rm_old_fr;
2411 my $rm_old_tr;
2412 my $rm_old_ar;
2413 my $rm_old_sr;
2414 my $rm_old_kr;
2415 my $rm_old_ser;
2416
2417 my $fr_create;
2418 my $rd_create;
2419 my $create = {};
2420
2421 my %descriptor_code = (
2422         item_type => 'substr($ldr,6,1)',
2423         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2424         bib_level => 'substr($ldr,7,1)',
2425         control_type => 'substr($ldr,8,1)',
2426         char_encoding => 'substr($ldr,9,1)',
2427         enc_level => 'substr($ldr,17,1)',
2428         cat_form => 'substr($ldr,18,1)',
2429         pub_status => 'substr($ldr,5,1)',
2430         item_lang => 'substr($oo8,35,3)',
2431         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2432         audience => 'substr($oo8,22,1)',
2433 );
2434
2435 sub wormize {
2436
2437         my $self = shift;
2438         my $client = shift;
2439         my @docids = @_;
2440
2441         my $no_map = 0;
2442         if ($self->api_name =~ /no_map/o) {
2443                 $no_map = 1;
2444         }
2445
2446         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2447                 unless ($in_xact);
2448         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2449                 unless ($begin);
2450         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2451                 unless ($commit);
2452         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2453                 unless ($rollback);
2454         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2455                 unless ($sm_lookup);
2456         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2457                 unless ($mr_lookup);
2458         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2459                 unless ($mr_update);
2460         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2461                 unless ($lookup);
2462         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2463                 unless ($update_entry);
2464         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2465                 unless ($rm_old_sm);
2466         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2467                 unless ($rm_old_rd);
2468         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2469                 unless ($rm_old_fr);
2470         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2471                 unless ($rm_old_tr);
2472         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2473                 unless ($rm_old_ar);
2474         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2475                 unless ($rm_old_sr);
2476         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2477                 unless ($rm_old_kr);
2478         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2479                 unless ($rm_old_ser);
2480         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2481                 unless ($mr_create);
2482         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2483                 unless ($create_source_map);
2484         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2485                 unless ($rd_create);
2486         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2487                 unless ($fr_create);
2488         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2489                 unless ($$create{title});
2490         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2491                 unless ($$create{author});
2492         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2493                 unless ($$create{subject});
2494         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2495                 unless ($$create{keyword});
2496         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2497                 unless ($$create{series});
2498
2499
2500         my ($outer_xact) = $in_xact->run;
2501         try {
2502                 unless ($outer_xact) {
2503                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2504                         my ($r) = $begin->run($client);
2505                         unless (defined $r and $r) {
2506                                 $rollback->run;
2507                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2508                         }
2509                 }
2510         } catch Error with {
2511                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2512         };
2513
2514         my @source_maps;
2515         my @entry_list;
2516         my @mr_list;
2517         my @rd_list;
2518         my @ns_list;
2519         my @mods_data;
2520         my $ret = 0;
2521         for my $entry ( $lookup->run(@docids) ) {
2522                 # step -1: grab the doc from storage
2523                 next unless ($entry);
2524
2525                 if(!$mods_sheet) {
2526                         my $xslt_doc = $parser->parse_file(
2527                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2528                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2529                 }
2530
2531                 my $xml = $entry->marc;
2532                 my $docid = $entry->id;
2533                 my $marcdoc = $parser->parse_string($xml);
2534                 my $modsdoc = $mods_sheet->transform($marcdoc);
2535
2536                 my $mods = $modsdoc->documentElement;
2537                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2538
2539                 $entry->fingerprint( fingerprint_mods( $mods ) );
2540                 push @entry_list, $entry;
2541
2542                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2543
2544                 unless ($no_map) {
2545                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2546                         if (!$mr || !@$mr) {
2547                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2548                                 $mr = new Fieldmapper::metabib::metarecord;
2549                                 $mr->fingerprint( $entry->fingerprint );
2550                                 $mr->master_record( $entry->id );
2551                                 my ($new_mr) = $mr_create->run($mr);
2552                                 $mr->id($new_mr);
2553                                 unless (defined $mr) {
2554                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2555                                 }
2556                         } else {
2557                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2558                                 $mr->mods('');
2559                                 push @mr_list, $mr;
2560                         }
2561
2562                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2563                         $sm->metarecord( $mr->id );
2564                         $sm->source( $entry->id );
2565                         push @source_maps, $sm;
2566                 }
2567
2568                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2569                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2570
2571                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2572                 for my $rd_field ( keys %descriptor_code ) {
2573                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2574                 }
2575                 $rd_obj->record( $docid );
2576                 push @rd_list, $rd_obj;
2577
2578                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2579
2580                 # step 2: build the KOHA rows
2581                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2582                 $_->record( $docid ) for (@tmp_list);
2583                 push @ns_list, @tmp_list;
2584
2585                 $ret++;
2586
2587                 last unless ($self->api_name =~ /batch$/o);
2588         }
2589
2590         $rm_old_rd->run( { record => \@docids } );
2591         $rm_old_fr->run( { record => \@docids } );
2592         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2593         $rm_old_tr->run( { source => \@docids } );
2594         $rm_old_ar->run( { source => \@docids } );
2595         $rm_old_sr->run( { source => \@docids } );
2596         $rm_old_kr->run( { source => \@docids } );
2597         $rm_old_ser->run( { source => \@docids } );
2598
2599         unless ($no_map) {
2600                 my ($sm) = $create_source_map->run(@source_maps);
2601                 unless (defined $sm) {
2602                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2603                 }
2604                 my ($mr) = $mr_update->run(@mr_list);
2605                 unless (defined $mr) {
2606                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2607                 }
2608         }
2609
2610         my ($re) = $update_entry->run(@entry_list);
2611         unless (defined $re) {
2612                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2613         }
2614
2615         my ($rd) = $rd_create->run(@rd_list);
2616         unless (defined $rd) {
2617                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2618         }
2619
2620         my ($fr) = $fr_create->run(@ns_list);
2621         unless (defined $fr) {
2622                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2623         }
2624
2625         # step 5: insert the new metadata
2626         for my $class ( qw/title author subject keyword series/ ) {
2627                 my @md_list = ();
2628                 for my $doc ( @mods_data ) {
2629                         my ($did) = keys %$doc;
2630                         my ($data) = values %$doc;
2631
2632                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2633                         for my $row ( keys %{ $$data{$class} } ) {
2634                                 next unless (exists $$data{$class}{$row});
2635                                 next unless ($$data{$class}{$row}{value});
2636                                 my $fm_obj = $fm_constructor->new;
2637                                 $fm_obj->value( $$data{$class}{$row}{value} );
2638                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2639                                 $fm_obj->source( $did );
2640                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2641
2642                                 push @md_list, $fm_obj;
2643                         }
2644                 }
2645                         
2646                 my ($cr) = $$create{$class}->run(@md_list);
2647                 unless (defined $cr) {
2648                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2649                 }
2650         }
2651
2652         unless ($outer_xact) {
2653                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2654                 my ($c) = $commit->run;
2655                 unless (defined $c and $c) {
2656                         $rollback->run;
2657                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2658                 }
2659         }
2660
2661         return $ret;
2662 }
2663 __PACKAGE__->register_method( 
2664         api_name        => "open-ils.worm.wormize",
2665         method          => "wormize",
2666         api_level       => 1,
2667         argc            => 1,
2668 );
2669 __PACKAGE__->register_method( 
2670         api_name        => "open-ils.worm.wormize.no_map",
2671         method          => "wormize",
2672         api_level       => 1,
2673         argc            => 1,
2674 );
2675 __PACKAGE__->register_method( 
2676         api_name        => "open-ils.worm.wormize.batch",
2677         method          => "wormize",
2678         api_level       => 1,
2679         argc            => 1,
2680 );
2681 __PACKAGE__->register_method( 
2682         api_name        => "open-ils.worm.wormize.no_map.batch",
2683         method          => "wormize",
2684         api_level       => 1,
2685         argc            => 1,
2686 );
2687
2688
2689 my $ain_xact;
2690 my $abegin;
2691 my $acommit;
2692 my $arollback;
2693 my $alookup;
2694 my $aupdate_entry;
2695 my $amr_lookup;
2696 my $amr_update;
2697 my $amr_create;
2698 my $acreate_source_map;
2699 my $asm_lookup;
2700 my $arm_old_rd;
2701 my $arm_old_sm;
2702 my $arm_old_fr;
2703 my $arm_old_tr;
2704 my $arm_old_ar;
2705 my $arm_old_sr;
2706 my $arm_old_kr;
2707 my $arm_old_ser;
2708
2709 my $afr_create;
2710 my $ard_create;
2711 my $acreate = {};
2712
2713 sub authority_wormize {
2714
2715         my $self = shift;
2716         my $client = shift;
2717         my @docids = @_;
2718
2719         my $no_map = 0;
2720         if ($self->api_name =~ /no_map/o) {
2721                 $no_map = 1;
2722         }
2723
2724         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2725                 unless ($in_xact);
2726         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2727                 unless ($begin);
2728         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2729                 unless ($commit);
2730         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2731                 unless ($rollback);
2732         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2733                 unless ($alookup);
2734         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2735                 unless ($aupdate_entry);
2736         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2737                 unless ($arm_old_rd);
2738         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2739                 unless ($arm_old_fr);
2740         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2741                 unless ($ard_create);
2742         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2743                 unless ($afr_create);
2744
2745
2746         my ($outer_xact) = $in_xact->run;
2747         try {
2748                 unless ($outer_xact) {
2749                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2750                         my ($r) = $begin->run($client);
2751                         unless (defined $r and $r) {
2752                                 $rollback->run;
2753                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2754                         }
2755                 }
2756         } catch Error with {
2757                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2758         };
2759
2760         my @source_maps;
2761         my @entry_list;
2762         my @mr_list;
2763         my @rd_list;
2764         my @ns_list;
2765         my @mads_data;
2766         my $ret = 0;
2767         for my $entry ( $lookup->run(@docids) ) {
2768                 # step -1: grab the doc from storage
2769                 next unless ($entry);
2770
2771                 #if(!$mads_sheet) {
2772                 #       my $xslt_doc = $parser->parse_file(
2773                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2774                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2775                 #}
2776
2777                 my $xml = $entry->marc;
2778                 my $docid = $entry->id;
2779                 my $marcdoc = $parser->parse_string($xml);
2780                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2781
2782                 #my $mads = $madsdoc->documentElement;
2783                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2784
2785                 push @entry_list, $entry;
2786
2787                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2788                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2789
2790                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2791                 for my $rd_field ( keys %descriptor_code ) {
2792                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2793                 }
2794                 $rd_obj->record( $docid );
2795                 push @rd_list, $rd_obj;
2796
2797                 # step 2: build the KOHA rows
2798                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2799                 $_->record( $docid ) for (@tmp_list);
2800                 push @ns_list, @tmp_list;
2801
2802                 $ret++;
2803
2804                 last unless ($self->api_name =~ /batch$/o);
2805         }
2806
2807         $arm_old_rd->run( { record => \@docids } );
2808         $arm_old_fr->run( { record => \@docids } );
2809
2810         my ($rd) = $ard_create->run(@rd_list);
2811         unless (defined $rd) {
2812                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2813         }
2814
2815         my ($fr) = $fr_create->run(@ns_list);
2816         unless (defined $fr) {
2817                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2818         }
2819
2820         unless ($outer_xact) {
2821                 $log->debug("Commiting transaction started by Ingest.", INFO);
2822                 my ($c) = $commit->run;
2823                 unless (defined $c and $c) {
2824                         $rollback->run;
2825                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2826                 }
2827         }
2828
2829         return $ret;
2830 }
2831 __PACKAGE__->register_method( 
2832         api_name        => "open-ils.worm.authortiy.wormize",
2833         method          => "wormize",
2834         api_level       => 1,
2835         argc            => 1,
2836 );
2837 __PACKAGE__->register_method( 
2838         api_name        => "open-ils.worm.authority.wormize.batch",
2839         method          => "wormize",
2840         api_level       => 1,
2841         argc            => 1,
2842 );
2843
2844
2845 # --------------------------------------------------------------------------------
2846
2847
2848 sub _marcxml_to_full_rows {
2849
2850         my $marcxml = shift;
2851         my $type = shift || 'Fieldmapper::metabib::full_rec';
2852
2853         my @ns_list;
2854         
2855         my $root = $marcxml->documentElement;
2856
2857         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2858                 next unless $tagline;
2859
2860                 my $ns = new Fieldmapper::metabib::full_rec;
2861
2862                 $ns->tag( 'LDR' );
2863                 my $val = NFD($tagline->textContent);
2864                 $val =~ s/(\pM+)//gso;
2865                 $ns->value( $val );
2866
2867                 push @ns_list, $ns;
2868         }
2869
2870         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2871                 next unless $tagline;
2872
2873                 my $ns = new Fieldmapper::metabib::full_rec;
2874
2875                 $ns->tag( $tagline->getAttribute( "tag" ) );
2876                 my $val = NFD($tagline->textContent);
2877                 $val =~ s/(\pM+)//gso;
2878                 $ns->value( $val );
2879
2880                 push @ns_list, $ns;
2881         }
2882
2883         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2884                 next unless $tagline;
2885
2886                 my $tag = $tagline->getAttribute( "tag" );
2887                 my $ind1 = $tagline->getAttribute( "ind1" );
2888                 my $ind2 = $tagline->getAttribute( "ind2" );
2889
2890                 for my $data ( $tagline->childNodes ) {
2891                         next unless $data;
2892
2893                         my $ns = $type->new;
2894
2895                         $ns->tag( $tag );
2896                         $ns->ind1( $ind1 );
2897                         $ns->ind2( $ind2 );
2898                         $ns->subfield( $data->getAttribute( "code" ) );
2899                         my $val = NFD($data->textContent);
2900                         $val =~ s/(\pM+)//gso;
2901                         $ns->value( lc($val) );
2902
2903                         push @ns_list, $ns;
2904                 }
2905         }
2906         return @ns_list;
2907 }
2908
2909 sub _get_field_value {
2910
2911         my( $root, $xpath ) = @_;
2912
2913         my $string = "";
2914
2915         # grab the set of matching nodes
2916         my @nodes = $root->findnodes( $xpath );
2917         for my $value (@nodes) {
2918
2919                 # grab all children of the node
2920                 my @children = $value->childNodes();
2921                 for my $child (@children) {
2922
2923                         # add the childs content to the growing buffer
2924                         my $content = quotemeta($child->textContent);
2925                         next if ($string =~ /$content/);  # uniquify the values
2926                         $string .= $child->textContent . " ";
2927                 }
2928                 if( ! @children ) {
2929                         $string .= $value->textContent . " ";
2930                 }
2931         }
2932         $string = NFD($string);
2933         $string =~ s/(\pM)//gso;
2934         return lc($string);
2935 }
2936
2937
2938 sub modsdoc_to_values {
2939         my( $self, $mods ) = @_;
2940         my $data = {};
2941         for my $class (keys %$xpathset) {
2942                 $data->{$class} = {};
2943                 for my $type (keys %{$xpathset->{$class}}) {
2944                         $data->{$class}->{$type} = {};
2945                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2946                 }
2947         }
2948         return $data;
2949 }
2950
2951
2952 1;
2953
2954