]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
fixing ingest bug affecting records with changing fingerprints
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21 our %supported_formats = (
22         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
23         mods    => {ns => 'http://www.loc.gov/mods/'},
24         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25         srw_dc  => {ns => 'info:srw/schema/1/dc-schema'},
26         oai_dc  => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27         rdf_dc  => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28         atom    => {ns => 'http://www.w3.org/2005/Atom'},
29         rss091  => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
30         rss092  => {ns => ''},
31         rss093  => {ns => ''},
32         rss094  => {ns => ''},
33         rss10   => {ns => 'http://purl.org/rss/1.0/'},
34         rss11   => {ns => 'http://purl.org/net/rss1.1#'},
35         rss2    => {ns => ''},
36 );
37
38
39 my $log = 'OpenSRF::Utils::Logger';
40
41 my  $parser = XML::LibXML->new();
42 my  $xslt = XML::LibXSLT->new();
43
44 my  $mods_sheet;
45 my  $mads_sheet;
46 my  $xpathset = {};
47 sub initialize {}
48 sub child_init {}
49
50 sub post_init {
51
52         unless (keys %$xpathset) {
53                 $log->debug("Running post_init", DEBUG);
54
55                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56
57                 unless ($supported_formats{mods}{xslt}) {
58                         $log->debug("Loading MODS XSLT", DEBUG);
59                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61                 }
62
63                 unless ($supported_formats{mods3}{xslt}) {
64                         $log->debug("Loading MODS v3 XSLT", DEBUG);
65                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67                 }
68
69
70                 my $req = OpenSRF::AppSession
71                                 ->create('open-ils.cstore')
72                                 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
73                                 ->gather(1);
74
75                 if (ref $req and @$req) {
76                         for my $f (@$req) {
77                                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78                                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79                                 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80                                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
81                         }
82                 }
83         }
84 }
85
86 sub entityize {
87         my $stuff = shift;
88         my $form = shift;
89
90         if ($form eq 'D') {
91                 $stuff = NFD($stuff);
92         } else {
93                 $stuff = NFC($stuff);
94         }
95
96         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
97         return $stuff;
98 }
99
100 # --------------------------------------------------------------------------------
101 # Biblio ingest
102
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
106
107 sub rw_biblio_ingest_single_object {
108         my $self = shift;
109         my $client = shift;
110         my $bib = shift;
111
112         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113         return undef unless ($blob);
114
115         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116         $bib->quality( $blob->{fingerprint}->{quality} );
117
118         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
119
120         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
121
122         # update full_rec stuff ...
123         my $tmp = $cstore->request(
124                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125                 { record => $bib->id }
126         )->gather(1);
127
128         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
130
131         # update rec_descriptor stuff ...
132         $tmp = $cstore->request(
133                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134                 { record => $bib->id }
135         )->gather(1);
136
137         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
139
140         # deal with classed fields...
141         for my $class ( qw/title author subject keyword series/ ) {
142                 $tmp = $cstore->request(
143                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144                         { source => $bib->id }
145                 )->gather(1);
146
147                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
148         }
149         for my $obj ( @{ $blob->{field_entries} } ) {
150                 my $class = $obj->class_name;
151                 $class =~ s/^Fieldmapper:://o;
152                 $class =~ s/::/./go;
153                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
154         }
155
156         # update MR map ...
157
158         $tmp = $cstore->request(
159                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
160                 { source => $bib->id }
161         )->gather(1);
162
163         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
164
165         # get the old MRs
166         my $old_mrs = $cstore->request(
167                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
168         )->gather(1) if (@$tmp);
169
170         $old_mrs = [] if (!ref($old_mrs));
171
172         my $mr;
173         for my $m (@$old_mrs) {
174                 if ($m->fingerprint eq $bib->fingerprint) {
175                         $mr = $m;
176                 } else {
177                         my $others = $cstore->request(
178                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
179                         )->gather(1);
180
181                         if (!@$others) {
182                                 $cstore->request(
183                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
184                                 )->gather(1);
185                         }
186
187                         $m->isdeleted(1);
188                 }
189         }
190
191         my $holds;
192         if (!$mr) {
193                 # Get the matchin MR, if any.
194                 $mr = $cstore->request(
195                         'open-ils.cstore.direct.metabib.metarecord.search',
196                         { fingerprint => $bib->fingerprint }
197                 )->gather(1);
198
199                 $holds = $cstore->request(
200                         'open-ils.cstore.direct.action.hold_request.search.atomic',
201                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
202                 )->gather(1) if (@$old_mrs);
203
204                 if ($mr) {
205                         for my $h (@$holds) {
206                                 $h->target($mr);
207                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
208                                 $h->ischanged(1);
209                         }
210                 }
211         }
212
213         if (!$mr) {
214                 $mr = new Fieldmapper::metabib::metarecord;
215                 $mr->fingerprint( $bib->fingerprint );
216                 $mr->master_record( $bib->id );
217                 $mr->id(
218                         $cstore->request(
219                                 "open-ils.cstore.direct.metabib.metarecord.create",
220                                 $mr => { quiet => 'true' }
221                         )->gather(1)
222                 );
223
224                 for my $h (grep { !$_->ischanged } @$holds) {
225                         $h->target($mr);
226                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
227                 }
228         } else {
229                 my $mrm = $cstore->request(
230                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
231                         { metarecord => $mr->id }
232                 )->gather(1);
233
234                 if (@$mrm) {
235                         my $best = $cstore->request(
236                                 "open-ils.cstore.direct.biblio.record_entry.search",
237                                 { id => [ map { $_->source } @$mrm ] },
238                                 { 'select'      => { bre => [ qw/id quality/ ] },
239                                 order_by        => { bre => "quality desc" },
240                                 limit           => 1,
241                                 }
242                         )->gather(1);
243
244                         if ($best->quality > $bib->quality) {
245                                 $mr->master_record($best->id);
246                         } else {
247                                 $mr->master_record($bib->id);
248                         }
249                 } else {
250                         $mr->master_record($bib->id);
251                 }
252
253                 $mr->clear_mods;
254
255                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
256         }
257
258         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
259         $mrm->source($bib->id);
260         $mrm->metarecord($mr->id);
261
262         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
263         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
264
265         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
266
267         return $bib->id;
268 }
269 __PACKAGE__->register_method(  
270         api_name        => "open-ils.ingest.full.biblio.object",
271         method          => "rw_biblio_ingest_single_object",
272         api_level       => 1,
273         argc            => 1,
274 );                      
275
276 sub rw_biblio_ingest_single_record {
277         my $self = shift;
278         my $client = shift;
279         my $rec = shift;
280
281         OpenILS::Application::Ingest->post_init();
282         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
283         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
284
285         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
286
287         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
288         $cstore->disconnect;
289
290         return undef unless ($r and @$r);
291
292         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
293 }
294 __PACKAGE__->register_method(  
295         api_name        => "open-ils.ingest.full.biblio.record",
296         method          => "rw_biblio_ingest_single_record",
297         api_level       => 1,
298         argc            => 1,
299 );                      
300
301 sub ro_biblio_ingest_single_object {
302         my $self = shift;
303         my $client = shift;
304         my $bib = shift;
305         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
306
307         my $document = $parser->parse_string($xml);
308
309         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
310         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
311         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
312         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
313
314         $_->source($bib->id) for (@mXfe);
315         $_->record($bib->id) for (@mfr);
316         $rd->record($bib->id) if ($rd);
317
318         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
319 }
320 __PACKAGE__->register_method(  
321         api_name        => "open-ils.ingest.full.biblio.object.readonly",
322         method          => "ro_biblio_ingest_single_object",
323         api_level       => 1,
324         argc            => 1,
325 );                      
326
327 sub ro_biblio_ingest_single_xml {
328         my $self = shift;
329         my $client = shift;
330         my $xml = OpenILS::Application::Ingest::entityize(shift);
331
332         my $document = $parser->parse_string($xml);
333
334         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
335         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
336         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
337         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
338
339         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
340 }
341 __PACKAGE__->register_method(  
342         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
343         method          => "ro_biblio_ingest_single_xml",
344         api_level       => 1,
345         argc            => 1,
346 );                      
347
348 sub ro_biblio_ingest_single_record {
349         my $self = shift;
350         my $client = shift;
351         my $rec = shift;
352
353         OpenILS::Application::Ingest->post_init();
354         my $r = OpenSRF::AppSession
355                         ->create('open-ils.cstore')
356                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
357                         ->gather(1);
358
359         return undef unless ($r and @$r);
360
361         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
362
363         $_->source($rec) for (@{$res->{field_entries}});
364         $_->record($rec) for (@{$res->{full_rec}});
365         $res->{descriptor}->record($rec);
366
367         return $res;
368 }
369 __PACKAGE__->register_method(  
370         api_name        => "open-ils.ingest.full.biblio.record.readonly",
371         method          => "ro_biblio_ingest_single_record",
372         api_level       => 1,
373         argc            => 1,
374 );                      
375
376 sub ro_biblio_ingest_stream_record {
377         my $self = shift;
378         my $client = shift;
379
380         OpenILS::Application::Ingest->post_init();
381
382         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
383
384         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
385         
386                 my $rec = $resp->content;
387                 last unless (defined $rec);
388
389                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
390                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
391
392                 $_->source($rec) for (@{$res->{field_entries}});
393                 $_->record($rec) for (@{$res->{full_rec}});
394
395                 $client->respond( $res );
396         }
397
398         return undef;
399 }
400 __PACKAGE__->register_method(  
401         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
402         method          => "ro_biblio_ingest_stream_record",
403         api_level       => 1,
404         stream          => 1,
405 );                      
406
407 sub ro_biblio_ingest_stream_xml {
408         my $self = shift;
409         my $client = shift;
410
411         OpenILS::Application::Ingest->post_init();
412
413         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
414
415         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
416         
417                 my $xml = $resp->content;
418                 last unless (defined $xml);
419
420                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
421                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
422
423                 $client->respond( $res );
424         }
425
426         return undef;
427 }
428 __PACKAGE__->register_method(  
429         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
430         method          => "ro_biblio_ingest_stream_xml",
431         api_level       => 1,
432         stream          => 1,
433 );                      
434
435 sub rw_biblio_ingest_stream_import {
436         my $self = shift;
437         my $client = shift;
438
439         OpenILS::Application::Ingest->post_init();
440
441         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
442
443         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
444         
445                 my $bib = $resp->content;
446                 last unless (defined $bib);
447
448                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
449                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
450
451                 $_->source($bib->id) for (@{$res->{field_entries}});
452                 $_->record($bib->id) for (@{$res->{full_rec}});
453
454                 $client->respond( $res );
455         }
456
457         return undef;
458 }
459 __PACKAGE__->register_method(  
460         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
461         method          => "rw_biblio_ingest_stream_import",
462         api_level       => 1,
463         stream          => 1,
464 );                      
465
466
467 # --------------------------------------------------------------------------------
468 # Authority ingest
469
470 package OpenILS::Application::Ingest::Authority;
471 use base qw/OpenILS::Application::Ingest/;
472 use Unicode::Normalize;
473
474 sub ro_authority_ingest_single_object {
475         my $self = shift;
476         my $client = shift;
477         my $bib = shift;
478         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
479
480         my $document = $parser->parse_string($xml);
481
482         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
483
484         $_->record($bib->id) for (@mfr);
485
486         return { full_rec => \@mfr };
487 }
488 __PACKAGE__->register_method(  
489         api_name        => "open-ils.ingest.full.authority.object.readonly",
490         method          => "ro_authority_ingest_single_object",
491         api_level       => 1,
492         argc            => 1,
493 );                      
494
495 sub ro_authority_ingest_single_xml {
496         my $self = shift;
497         my $client = shift;
498         my $xml = OpenILS::Application::Ingest::entityize(shift);
499
500         my $document = $parser->parse_string($xml);
501
502         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
503
504         return { full_rec => \@mfr };
505 }
506 __PACKAGE__->register_method(  
507         api_name        => "open-ils.ingest.full.authority.xml.readonly",
508         method          => "ro_authority_ingest_single_xml",
509         api_level       => 1,
510         argc            => 1,
511 );                      
512
513 sub ro_authority_ingest_single_record {
514         my $self = shift;
515         my $client = shift;
516         my $rec = shift;
517
518         OpenILS::Application::Ingest->post_init();
519         my $r = OpenSRF::AppSession
520                         ->create('open-ils.cstore')
521                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
522                         ->gather(1);
523
524         return undef unless ($r and @$r);
525
526         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
527
528         $_->record($rec) for (@{$res->{full_rec}});
529         $res->{descriptor}->record($rec);
530
531         return $res;
532 }
533 __PACKAGE__->register_method(  
534         api_name        => "open-ils.ingest.full.authority.record.readonly",
535         method          => "ro_authority_ingest_single_record",
536         api_level       => 1,
537         argc            => 1,
538 );                      
539
540 sub ro_authority_ingest_stream_record {
541         my $self = shift;
542         my $client = shift;
543
544         OpenILS::Application::Ingest->post_init();
545
546         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
547
548         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
549         
550                 my $rec = $resp->content;
551                 last unless (defined $rec);
552
553                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
554                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
555
556                 $_->record($rec) for (@{$res->{full_rec}});
557
558                 $client->respond( $res );
559         }
560
561         return undef;
562 }
563 __PACKAGE__->register_method(  
564         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
565         method          => "ro_authority_ingest_stream_record",
566         api_level       => 1,
567         stream          => 1,
568 );                      
569
570 sub ro_authority_ingest_stream_xml {
571         my $self = shift;
572         my $client = shift;
573
574         OpenILS::Application::Ingest->post_init();
575
576         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
577
578         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
579         
580                 my $xml = $resp->content;
581                 last unless (defined $xml);
582
583                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
584                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
585
586                 $client->respond( $res );
587         }
588
589         return undef;
590 }
591 __PACKAGE__->register_method(  
592         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
593         method          => "ro_authority_ingest_stream_xml",
594         api_level       => 1,
595         stream          => 1,
596 );                      
597
598 sub rw_authority_ingest_stream_import {
599         my $self = shift;
600         my $client = shift;
601
602         OpenILS::Application::Ingest->post_init();
603
604         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
605
606         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
607         
608                 my $bib = $resp->content;
609                 last unless (defined $bib);
610
611                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
612                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
613
614                 $_->record($bib->id) for (@{$res->{full_rec}});
615
616                 $client->respond( $res );
617         }
618
619         return undef;
620 }
621 __PACKAGE__->register_method(  
622         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
623         method          => "rw_authority_ingest_stream_import",
624         api_level       => 1,
625         stream          => 1,
626 );                      
627
628
629 # --------------------------------------------------------------------------------
630 # MARC index extraction
631
632 package OpenILS::Application::Ingest::XPATH;
633 use base qw/OpenILS::Application::Ingest/;
634 use Unicode::Normalize;
635
636 # give this an XML documentElement and an XPATH expression
637 sub xpath_to_string {
638         my $xml = shift;
639         my $xpath = shift;
640         my $ns_uri = shift;
641         my $ns_prefix = shift;
642         my $unique = shift;
643
644         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
645
646         my $string = "";
647
648         # grab the set of matching nodes
649         my @nodes = $xml->findnodes( $xpath );
650         for my $value (@nodes) {
651
652                 # grab all children of the node
653                 my @children = $value->childNodes();
654                 for my $child (@children) {
655
656                         # add the childs content to the growing buffer
657                         my $content = quotemeta($child->textContent);
658                         next if ($unique && $string =~ /$content/);  # uniquify the values
659                         $string .= $child->textContent . " ";
660                 }
661                 if( ! @children ) {
662                         $string .= $value->textContent . " ";
663                 }
664         }
665         return NFD($string);
666 }
667
668 sub class_index_string_xml {
669         my $self = shift;
670         my $client = shift;
671         my $xml = shift;
672         my @classes = @_;
673
674         OpenILS::Application::Ingest->post_init();
675         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
676
677         my %transform_cache;
678         
679         for my $class (@classes) {
680                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
681                 for my $type ( keys %{ $xpathset->{$class} } ) {
682
683                         my $def = $xpathset->{$class}->{$type};
684                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
685
686                         my $document = $xml;
687
688                         if ($sf->{xslt}) {
689                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
690                                 $transform_cache{$def->{format}} = $document;
691                         }
692
693                         my $value =  xpath_to_string(
694                                         $document->documentElement      => $def->{xpath},
695                                         $sf->{ns}                       => $def->{format},
696                                         1
697                         );
698
699                         next unless $value;
700
701                         $value = NFD($value);
702                         $value =~ s/\pM+//sgo;
703                         $value =~ s/\pC+//sgo;
704                         $value =~ s/\W+$//sgo;
705
706                         $value =~ s/\b\.+\b//sgo;
707                         $value = lc($value);
708
709                         my $fm = $class_constructor->new;
710                         $fm->value( $value );
711                         $fm->field( $xpathset->{$class}->{$type}->{id} );
712                         $client->respond($fm);
713                 }
714         }
715         return undef;
716 }
717 __PACKAGE__->register_method(  
718         api_name        => "open-ils.ingest.field_entry.class.xml",
719         method          => "class_index_string_xml",
720         api_level       => 1,
721         argc            => 2,
722         stream          => 1,
723 );                      
724
725 sub class_index_string_record {
726         my $self = shift;
727         my $client = shift;
728         my $rec = shift;
729         my @classes = shift;
730
731         OpenILS::Application::Ingest->post_init();
732         my $r = OpenSRF::AppSession
733                         ->create('open-ils.cstore')
734                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
735                         ->gather(1);
736
737         return undef unless ($r and @$r);
738
739         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
740                 $fm->source($rec);
741                 $client->respond($fm);
742         }
743         return undef;
744 }
745 __PACKAGE__->register_method(  
746         api_name        => "open-ils.ingest.field_entry.class.record",
747         method          => "class_index_string_record",
748         api_level       => 1,
749         argc            => 2,
750         stream          => 1,
751 );                      
752
753 sub all_index_string_xml {
754         my $self = shift;
755         my $client = shift;
756         my $xml = shift;
757
758         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
759                 $client->respond($fm);
760         }
761         return undef;
762 }
763 __PACKAGE__->register_method(  
764         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
765         method          => "all_index_string_xml",
766         api_level       => 1,
767         argc            => 1,
768         stream          => 1,
769 );                      
770
771 sub all_index_string_record {
772         my $self = shift;
773         my $client = shift;
774         my $rec = shift;
775
776         OpenILS::Application::Ingest->post_init();
777         my $r = OpenSRF::AppSession
778                         ->create('open-ils.cstore')
779                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
780                         ->gather(1);
781
782         return undef unless ($r and @$r);
783
784         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
785                 $fm->source($rec);
786                 $client->respond($fm);
787         }
788         return undef;
789 }
790 __PACKAGE__->register_method(  
791         api_name        => "open-ils.ingest.extract.field_entry.all.record",
792         method          => "all_index_string_record",
793         api_level       => 1,
794         argc            => 1,
795         stream          => 1,
796 );                      
797
798 # --------------------------------------------------------------------------------
799 # Flat MARC
800
801 package OpenILS::Application::Ingest::FlatMARC;
802 use base qw/OpenILS::Application::Ingest/;
803 use Unicode::Normalize;
804
805
806 sub _marcxml_to_full_rows {
807
808         my $marcxml = shift;
809         my $xmltype = shift || 'metabib';
810
811         my $type = "Fieldmapper::${xmltype}::full_rec";
812
813         my @ns_list;
814         
815         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
816
817         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
818                 next unless $tagline;
819
820                 my $ns = $type->new;
821
822                 $ns->tag( 'LDR' );
823                 my $val = $tagline->textContent;
824                 $val = NFD($val);
825                 $val =~ s/\pM+//sgo;
826                 $val =~ s/\pC+//sgo;
827                 $val =~ s/\W+$//sgo;
828                 $ns->value( $val );
829
830                 push @ns_list, $ns;
831         }
832
833         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
834                 next unless $tagline;
835
836                 my $ns = $type->new;
837
838                 $ns->tag( $tagline->getAttribute( "tag" ) );
839                 my $val = $tagline->textContent;
840                 $val = NFD($val);
841                 $val =~ s/\pM+//sgo;
842                 $val =~ s/\pC+//sgo;
843                 $val =~ s/\W+$//sgo;
844                 $ns->value( $val );
845
846                 push @ns_list, $ns;
847         }
848
849         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
850                 next unless $tagline;
851
852                 my $tag = $tagline->getAttribute( "tag" );
853                 my $ind1 = $tagline->getAttribute( "ind1" );
854                 my $ind2 = $tagline->getAttribute( "ind2" );
855
856                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
857                         next unless $data;
858
859                         my $ns = $type->new;
860
861                         $ns->tag( $tag );
862                         $ns->ind1( $ind1 );
863                         $ns->ind2( $ind2 );
864                         $ns->subfield( $data->getAttribute( "code" ) );
865                         my $val = $data->textContent;
866                         $val = NFD($val);
867                         $val =~ s/\pM+//sgo;
868                         $val =~ s/\pC+//sgo;
869                         $val =~ s/\W+$//sgo;
870                         $ns->value( lc($val) );
871
872                         push @ns_list, $ns;
873                 }
874         }
875
876         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
877         return @ns_list;
878 }
879
880 sub flat_marc_xml {
881         my $self = shift;
882         my $client = shift;
883         my $xml = shift;
884
885         $log->debug("processing [$xml]");
886
887         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
888
889         my $type = 'metabib';
890         $type = 'authority' if ($self->api_name =~ /authority/o);
891
892         OpenILS::Application::Ingest->post_init();
893
894         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
895         return undef;
896 }
897 __PACKAGE__->register_method(  
898         api_name        => "open-ils.ingest.flat_marc.authority.xml",
899         method          => "flat_marc_xml",
900         api_level       => 1,
901         argc            => 1,
902         stream          => 1,
903 );                      
904 __PACKAGE__->register_method(  
905         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
906         method          => "flat_marc_xml",
907         api_level       => 1,
908         argc            => 1,
909         stream          => 1,
910 );                      
911
912 sub flat_marc_record {
913         my $self = shift;
914         my $client = shift;
915         my $rec = shift;
916
917         my $type = 'biblio';
918         $type = 'authority' if ($self->api_name =~ /authority/o);
919
920         OpenILS::Application::Ingest->post_init();
921         my $r = OpenSRF::AppSession
922                         ->create('open-ils.cstore')
923                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
924                         ->gather(1);
925
926
927         return undef unless ($r and $r->marc);
928
929         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
930         for my $row (@rows) {
931                 $client->respond($row);
932                 $log->debug(JSON->perl2JSON($row), DEBUG);
933         }
934         return undef;
935 }
936 __PACKAGE__->register_method(  
937         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
938         method          => "flat_marc_record",
939         api_level       => 1,
940         argc            => 1,
941         stream          => 1,
942 );                      
943 __PACKAGE__->register_method(  
944         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
945         method          => "flat_marc_record",
946         api_level       => 1,
947         argc            => 1,
948         stream          => 1,
949 );                      
950
951 # --------------------------------------------------------------------------------
952 # Fingerprinting
953
954 package OpenILS::Application::Ingest::Biblio::Fingerprint;
955 use base qw/OpenILS::Application::Ingest/;
956 use Unicode::Normalize;
957 use OpenSRF::EX qw/:try/;
958
959 sub biblio_fingerprint_record {
960         my $self = shift;
961         my $client = shift;
962         my $rec = shift;
963
964         OpenILS::Application::Ingest->post_init();
965
966         my $r = OpenSRF::AppSession
967                         ->create('open-ils.cstore')
968                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
969                         ->gather(1);
970
971         return undef unless ($r and $r->marc);
972
973         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
974         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
975         $fp->{quality} = int($fp->{quality});
976         return $fp;
977 }
978 __PACKAGE__->register_method(  
979         api_name        => "open-ils.ingest.fingerprint.record",
980         method          => "biblio_fingerprint_record",
981         api_level       => 1,
982         argc            => 1,
983 );                      
984
985 our $fp_script;
986 sub biblio_fingerprint {
987         my $self = shift;
988         my $client = shift;
989         my $xml = OpenILS::Application::Ingest::entityize(shift);
990
991         $log->internal("Got MARC [$xml]");
992
993         if(!$fp_script) {
994                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
995                 my $conf = OpenSRF::Utils::SettingsClient->new;
996
997                 my $libs        = $conf->config_value(@pfx, 'script_path');
998                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
999                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1000
1001                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1002                 
1003                 $fp_script = new OpenILS::Utils::ScriptRunner
1004                         ( file          => $script_file,
1005                           paths         => $script_libs,
1006                           reset_count   => 100 );
1007         }
1008
1009         $fp_script->insert('environment' => {marc => $xml} => 1);
1010
1011         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
1012         $log->debug("Script for biblio fingerprinting completed successfully...");
1013
1014         return $res;
1015 }
1016 __PACKAGE__->register_method(  
1017         api_name        => "open-ils.ingest.fingerprint.xml",
1018         method          => "biblio_fingerprint",
1019         api_level       => 1,
1020         argc            => 1,
1021 );                      
1022
1023 our $rd_script;
1024 sub biblio_descriptor {
1025         my $self = shift;
1026         my $client = shift;
1027         my $xml = OpenILS::Application::Ingest::entityize(shift);
1028
1029         $log->internal("Got MARC [$xml]");
1030
1031         if(!$rd_script) {
1032                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1033                 my $conf = OpenSRF::Utils::SettingsClient->new;
1034
1035                 my $libs        = $conf->config_value(@pfx, 'script_path');
1036                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1037                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1038
1039                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1040                 
1041                 $rd_script = new OpenILS::Utils::ScriptRunner
1042                         ( file          => $script_file,
1043                           paths         => $script_libs,
1044                           reset_count   => 100 );
1045         }
1046
1047         $log->debug("Setting up environment for descriptor extraction script...");
1048         $rd_script->insert('environment.marc' => $xml => 1);
1049         $log->debug("Environment building complete...");
1050
1051         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1052         $log->debug("Script for biblio descriptor extraction completed successfully");
1053
1054         return $res;
1055 }
1056 __PACKAGE__->register_method(  
1057         api_name        => "open-ils.ingest.descriptor.xml",
1058         method          => "biblio_descriptor",
1059         api_level       => 1,
1060         argc            => 1,
1061 );                      
1062
1063
1064 1;
1065
1066 __END__
1067
1068 sub in_transaction {
1069         OpenILS::Application::Ingest->post_init();
1070         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1071 }
1072
1073 sub begin_transaction {
1074         my $self = shift;
1075         my $client = shift;
1076         
1077         OpenILS::Application::Ingest->post_init();
1078         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1079         
1080         try {
1081                 if (!$outer_xact) {
1082                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1083                         #__PACKAGE__->st_sess->connect;
1084                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1085                         unless (defined $r and $r) {
1086                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1087                                 #__PACKAGE__->st_sess->disconnect;
1088                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1089                         }
1090                 }
1091         } otherwise {
1092                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1093         };
1094
1095         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1096 }
1097
1098 sub rollback_transaction {
1099         my $self = shift;
1100         my $client = shift;
1101
1102         OpenILS::Application::Ingest->post_init();
1103         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1104
1105         try {
1106                 if ($outer_xact) {
1107                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1108                 } else {
1109                         $log->debug("Ingest isn't inside a transaction.", INFO);
1110                 }
1111         } catch Error with {
1112                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1113         };
1114
1115         return 1;
1116 }
1117
1118 sub commit_transaction {
1119         my $self = shift;
1120         my $client = shift;
1121
1122         OpenILS::Application::Ingest->post_init();
1123         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1124
1125         try {
1126                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1127                 if ($outer_xact) {
1128                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1129                         unless (defined $r and $r) {
1130                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1131                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1132                         }
1133                         #__PACKAGE__->st_sess->disconnect;
1134                 } else {
1135                         $log->debug("Ingest isn't inside a transaction.", INFO);
1136                 }
1137         } catch Error with {
1138                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1139         };
1140
1141         return 1;
1142 }
1143
1144 sub storage_req {
1145         my $self = shift;
1146         my $method = shift;
1147         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1148         return shift( @res );
1149 }
1150
1151 sub scrub_authority_record {
1152         my $self = shift;
1153         my $client = shift;
1154         my $rec = shift;
1155
1156         my $commit = 0;
1157         if (!OpenILS::Application::Ingest->in_transaction) {
1158                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1159                 $commit = 1;
1160         }
1161
1162         my $success = 1;
1163         try {
1164                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1165
1166                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1167                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1168
1169                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1170         } otherwise {
1171                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1172                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1173                 $success = 0;
1174         };
1175
1176         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1177         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1178         return $success;
1179 }
1180 __PACKAGE__->register_method(  
1181         api_name        => "open-ils.worm.scrub.authority",
1182         method          => "scrub_authority_record",
1183         api_level       => 1,
1184         argc            => 1,
1185 );                      
1186
1187
1188 sub scrub_metabib_record {
1189         my $self = shift;
1190         my $client = shift;
1191         my $rec = shift;
1192
1193         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1194                 $rec = OpenILS::Application::Ingest->storage_req(
1195                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1196                 );
1197         }
1198
1199         my $commit = 0;
1200         if (!OpenILS::Application::Ingest->in_transaction) {
1201                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1202                 $commit = 1;
1203         }
1204
1205         my $success = 1;
1206         try {
1207                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1208                 
1209                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1210                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1211                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1212                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1213                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1214                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1215                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1216                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1217
1218                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1219                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1220
1221                 for my $mr (@$masters) {
1222                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
1223                         my $others = OpenILS::Application::Ingest->storage_req(
1224                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1225
1226                         if (@$others) {
1227                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1228                                 $mr->master_record($others->[0]->source);
1229                                 OpenILS::Application::Ingest->storage_req(
1230                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1231                                         { id => $mr->id },
1232                                         { master_record => $others->[0]->source, mods => undef }
1233                                 );
1234                         } else {
1235                                 warn "Removing metarecord whose master is $rec";
1236                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1237                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1238                                 warn "Metarecord removed";
1239                                 $log->debug( "Metarecord removed", DEBUG);
1240                         }
1241                 }
1242
1243                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1244
1245         } otherwise {
1246                 $log->debug('Scrubbing failed : '.shift(), ERROR);
1247                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1248                 $success = 0;
1249         };
1250
1251         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1252         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1253         return $success;
1254 }
1255 __PACKAGE__->register_method(  
1256         api_name        => "open-ils.worm.scrub.biblio",
1257         method          => "scrub_metabib_record",
1258         api_level       => 1,
1259         argc            => 1,
1260 );                      
1261
1262 sub wormize_biblio_metarecord {
1263         my $self = shift;
1264         my $client = shift;
1265         my $mrec = shift;
1266
1267         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1268
1269         my $count = 0;
1270         for my $r (@$recs) {
1271                 my $success = 0;
1272                 try {
1273                         $success = wormize_biblio_record($self => $client => $r->source);
1274                         $client->respond(
1275                                 { record  => $r->source,
1276                                   metarecord => $rec->metarecord,
1277                                   success => $success,
1278                                 }
1279                         );
1280                 } catch Error with {
1281                         my $e = shift;
1282                         $client->respond(
1283                                 { record  => $r->source,
1284                                   metarecord => $rec->metarecord,
1285                                   success => $success,
1286                                   error   => $e,
1287                                 }
1288                         );
1289                 };
1290         }
1291         return undef;
1292 }
1293 __PACKAGE__->register_method(
1294         api_name        => "open-ils.worm.wormize.metarecord",
1295         method          => "wormize_biblio_metarecord",
1296         api_level       => 1,
1297         argc            => 1,
1298         stream          => 1,
1299 );
1300 __PACKAGE__->register_method(
1301         api_name        => "open-ils.worm.wormize.metarecord.nomap",
1302         method          => "wormize_biblio_metarecord",
1303         api_level       => 1,
1304         argc            => 1,
1305         stream          => 1,
1306 );
1307 __PACKAGE__->register_method(
1308         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
1309         method          => "wormize_biblio_metarecord",
1310         api_level       => 1,
1311         argc            => 1,
1312         stream          => 1,
1313 );
1314 __PACKAGE__->register_method(
1315         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1316         method          => "wormize_biblio_metarecord",
1317         api_level       => 1,
1318         argc            => 1,
1319         stream          => 1,
1320 );
1321
1322
1323 sub wormize_biblio_record {
1324         my $self = shift;
1325         my $client = shift;
1326         my $rec = shift;
1327
1328         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1329                 $rec = OpenILS::Application::Ingest->storage_req(
1330                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1331                 );
1332         }
1333
1334
1335         my $commit = 0;
1336         if (!OpenILS::Application::Ingest->in_transaction) {
1337                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1338                 $commit = 1;
1339         }
1340
1341         my $success = 1;
1342         try {
1343                 # clean up the cruft
1344                 unless ($self->api_name =~ /noscrub/o) {
1345                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1346                 }
1347
1348                 # now redo 'em
1349                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1350
1351                 my @full_rec = ();
1352                 my @rec_descriptor = ();
1353                 my %field_entry = (
1354                         title   => [],
1355                         author  => [],
1356                         subject => [],
1357                         keyword => [],
1358                         series  => [],
1359                 );
1360                 my %metarecord = ();
1361                 my @source_map = ();
1362                 for my $r (@$bibs) {
1363                         try {
1364                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1365
1366                                 my $xml = $parser->parse_string($r->marc);
1367
1368                                 #update the fingerprint
1369                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1370                                 OpenILS::Application::Ingest->storage_req(
1371                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1372                                         { id => $r->id },
1373                                         { fingerprint => $fp->{fingerprint},
1374                                           quality     => int($fp->{quality}) }
1375                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1376
1377                                 # the full_rec stuff
1378                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1379                                         $fr->record( $r->id );
1380                                         push @full_rec, $fr;
1381                                 }
1382
1383                                 # the rec_descriptor stuff
1384                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1385                                 $rd->record( $r->id );
1386                                 push @rec_descriptor, $rd;
1387                         
1388                                 # the indexing field entry stuff
1389                                 for my $class ( qw/title author subject keyword series/ ) {
1390                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1391                                                 $fe->source( $r->id );
1392                                                 push @{$field_entry{$class}}, $fe;
1393                                         }
1394                                 }
1395
1396                                 unless ($self->api_name =~ /nomap/o) {
1397                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
1398                                 
1399                                         unless ($mr) {
1400                                                 $mr = Fieldmapper::metabib::metarecord->new;
1401                                                 $mr->fingerprint( $fp->{fingerprint} );
1402                                                 $mr->master_record( $r->id );
1403                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1404                                         }
1405
1406                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1407                                         $mr_map->metarecord( $mr->id );
1408                                         $mr_map->source( $r->id );
1409                                         push @source_map, $mr_map;
1410
1411                                         $metarecord{$mr->id} = $mr;
1412                                 }
1413                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1414                         } otherwise {
1415                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1416                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1417                         };
1418                 }
1419                 
1420
1421                 if (@rec_descriptor) {
1422                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1423
1424                         OpenILS::Application::Ingest->storage_req(
1425                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1426                                 @source_map
1427                         ) if (@source_map);
1428
1429                         for my $mr ( values %metarecord ) {
1430                                 my $sources = OpenILS::Application::Ingest->storage_req(
1431                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1432                                         $mr->id
1433                                 );
1434
1435                                 my $bibs = OpenILS::Application::Ingest->storage_req(
1436                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1437                                         [ map { $_->source } @$sources ]
1438                                 );
1439
1440                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1441
1442                                 OpenILS::Application::Ingest->storage_req(
1443                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
1444                                         { id => $mr->id },
1445                                         { master_record => $master->id, mods => undef }
1446                                 );
1447                         }
1448
1449                         OpenILS::Application::Ingest->storage_req(
1450                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1451                                 @rec_descriptor
1452                         ) if (@rec_descriptor);
1453
1454                         OpenILS::Application::Ingest->storage_req(
1455                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
1456                                 @full_rec
1457                         ) if (@full_rec);
1458
1459                         OpenILS::Application::Ingest->storage_req(
1460                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1461                                 @{ $field_entry{title} }
1462                         ) if (@{ $field_entry{title} });
1463
1464                         OpenILS::Application::Ingest->storage_req(
1465                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1466                                 @{ $field_entry{author} }
1467                         ) if (@{ $field_entry{author} });
1468                         
1469                         OpenILS::Application::Ingest->storage_req(
1470                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1471                                 @{ $field_entry{subject} }
1472                         ) if (@{ $field_entry{subject} });
1473
1474                         OpenILS::Application::Ingest->storage_req(
1475                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1476                                 @{ $field_entry{keyword} }
1477                         ) if (@{ $field_entry{keyword} });
1478
1479                         OpenILS::Application::Ingest->storage_req(
1480                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1481                                 @{ $field_entry{series} }
1482                         ) if (@{ $field_entry{series} });
1483
1484                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1485                 } else {
1486                         $success = 0;
1487                 }
1488
1489         } otherwise {
1490                 $log->debug('Wormization failed : '.shift(), ERROR);
1491                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1492                 $success = 0;
1493         };
1494
1495         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1496         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1497         return $success;
1498 }
1499 __PACKAGE__->register_method(
1500         api_name        => "open-ils.worm.wormize.biblio",
1501         method          => "wormize_biblio_record",
1502         api_level       => 1,
1503         argc            => 1,
1504 );
1505 __PACKAGE__->register_method(
1506         api_name        => "open-ils.worm.wormize.biblio.nomap",
1507         method          => "wormize_biblio_record",
1508         api_level       => 1,
1509         argc            => 1,
1510 );
1511 __PACKAGE__->register_method(
1512         api_name        => "open-ils.worm.wormize.biblio.noscrub",
1513         method          => "wormize_biblio_record",
1514         api_level       => 1,
1515         argc            => 1,
1516 );
1517 __PACKAGE__->register_method(
1518         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
1519         method          => "wormize_biblio_record",
1520         api_level       => 1,
1521         argc            => 1,
1522 );
1523
1524 sub wormize_authority_record {
1525         my $self = shift;
1526         my $client = shift;
1527         my $rec = shift;
1528
1529         my $commit = 0;
1530         if (!OpenILS::Application::Ingest->in_transaction) {
1531                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1532                 $commit = 1;
1533         }
1534
1535         my $success = 1;
1536         try {
1537                 # clean up the cruft
1538                 unless ($self->api_name =~ /noscrub/o) {
1539                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1540                 }
1541
1542                 # now redo 'em
1543                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1544
1545                 my @full_rec = ();
1546                 my @rec_descriptor = ();
1547                 for my $r (@$bibs) {
1548                         my $xml = $parser->parse_string($r->marc);
1549
1550                         # the full_rec stuff
1551                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1552                                 $fr->record( $r->id );
1553                                 push @full_rec, $fr;
1554                         }
1555
1556                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
1557                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1558                         #$rd->record( $r->id );
1559                         #push @rec_descriptor, $rd;
1560                         
1561                 }
1562
1563                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1564
1565                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1566                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1567
1568                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1569
1570         } otherwise {
1571                 $log->debug('Wormization failed : '.shift(), ERROR);
1572                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1573                 $success = 0;
1574         };
1575
1576         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1577         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1578         return $success;
1579 }
1580 __PACKAGE__->register_method(
1581         api_name        => "open-ils.worm.wormize.authority",
1582         method          => "wormize_authority_record",
1583         api_level       => 1,
1584         argc            => 1,
1585 );
1586 __PACKAGE__->register_method(
1587         api_name        => "open-ils.worm.wormize.authority.noscrub",
1588         method          => "wormize_authority_record",
1589         api_level       => 1,
1590         argc            => 1,
1591 );
1592
1593
1594 # --------------------------------------------------------------------------------
1595 # MARC index extraction
1596
1597 package OpenILS::Application::Ingest::XPATH;
1598 use base qw/OpenILS::Application::Ingest/;
1599 use Unicode::Normalize;
1600
1601 # give this a MODS documentElement and an XPATH expression
1602 sub _xpath_to_string {
1603         my $xml = shift;
1604         my $xpath = shift;
1605         my $ns_uri = shift;
1606         my $ns_prefix = shift;
1607         my $unique = shift;
1608
1609         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1610
1611         my $string = "";
1612
1613         # grab the set of matching nodes
1614         my @nodes = $xml->findnodes( $xpath );
1615         for my $value (@nodes) {
1616
1617                 # grab all children of the node
1618                 my @children = $value->childNodes();
1619                 for my $child (@children) {
1620
1621                         # add the childs content to the growing buffer
1622                         my $content = quotemeta($child->textContent);
1623                         next if ($unique && $string =~ /$content/);  # uniquify the values
1624                         $string .= $child->textContent . " ";
1625                 }
1626                 if( ! @children ) {
1627                         $string .= $value->textContent . " ";
1628                 }
1629         }
1630         return NFD($string);
1631 }
1632
1633 sub class_all_index_string_xml {
1634         my $self = shift;
1635         my $client = shift;
1636         my $xml = shift;
1637         my $class = shift;
1638
1639         OpenILS::Application::Ingest->post_init();
1640         $xml = $parser->parse_string($xml) unless (ref $xml);
1641         
1642         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1643         for my $type ( keys %{ $xpathset->{$class} } ) {
1644                 my $value =  _xpath_to_string(
1645                                 $mods_sheet->transform($xml)->documentElement,
1646                                 $xpathset->{$class}->{$type}->{xpath},
1647                                 "http://www.loc.gov/mods/",
1648                                 "mods",
1649                                 1
1650                 );
1651
1652                 next unless $value;
1653
1654                 $value = NFD($value);
1655                 $value =~ s/\pM+//sgo;
1656                 $value =~ s/\pC+//sgo;
1657                 $value =~ s/\W+$//sgo;
1658
1659                 $value =~ s/(\w)\./$1/sgo;
1660                 $value = lc($value);
1661
1662                 my $fm = $class_constructor->new;
1663                 $fm->value( $value );
1664                 $fm->field( $xpathset->{$class}->{$type}->{id} );
1665                 $client->respond($fm);
1666         }
1667         return undef;
1668 }
1669 __PACKAGE__->register_method(  
1670         api_name        => "open-ils.worm.field_entry.class.xml",
1671         method          => "class_all_index_string_xml",
1672         api_level       => 1,
1673         argc            => 1,
1674         stream          => 1,
1675 );                      
1676
1677 sub class_all_index_string_record {
1678         my $self = shift;
1679         my $client = shift;
1680         my $rec = shift;
1681         my $class = shift;
1682
1683         OpenILS::Application::Ingest->post_init();
1684         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1685
1686         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1687                 $fm->source($rec);
1688                 $client->respond($fm);
1689         }
1690         return undef;
1691 }
1692 __PACKAGE__->register_method(  
1693         api_name        => "open-ils.worm.field_entry.class.record",
1694         method          => "class_all_index_string_record",
1695         api_level       => 1,
1696         argc            => 1,
1697         stream          => 1,
1698 );                      
1699
1700
1701 sub class_index_string_xml {
1702         my $self = shift;
1703         my $client = shift;
1704         my $xml = shift;
1705         my $class = shift;
1706         my $type = shift;
1707
1708         OpenILS::Application::Ingest->post_init();
1709         $xml = $parser->parse_string($xml) unless (ref $xml);
1710         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1711 }
1712 __PACKAGE__->register_method(  
1713         api_name        => "open-ils.worm.class.type.xml",
1714         method          => "class_index_string_xml",
1715         api_level       => 1,
1716         argc            => 1,
1717 );                      
1718
1719 sub class_index_string_record {
1720         my $self = shift;
1721         my $client = shift;
1722         my $rec = shift;
1723         my $class = shift;
1724         my $type = shift;
1725
1726         OpenILS::Application::Ingest->post_init();
1727         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1728
1729         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1730         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1731         return $d;
1732 }
1733 __PACKAGE__->register_method(  
1734         api_name        => "open-ils.worm.class.type.record",
1735         method          => "class_index_string_record",
1736         api_level       => 1,
1737         argc            => 1,
1738 );                      
1739
1740 sub xml_xpath {
1741         my $self = shift;
1742         my $client = shift;
1743         my $xml = shift;
1744         my $xpath = shift;
1745         my $uri = shift;
1746         my $prefix = shift;
1747         my $unique = shift;
1748
1749         OpenILS::Application::Ingest->post_init();
1750         $xml = $parser->parse_string($xml) unless (ref $xml);
1751         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1752 }
1753 __PACKAGE__->register_method(  
1754         api_name        => "open-ils.worm.xpath.xml",
1755         method          => "xml_xpath",
1756         api_level       => 1,
1757         argc            => 1,
1758 );                      
1759
1760 sub record_xpath {
1761         my $self = shift;
1762         my $client = shift;
1763         my $rec = shift;
1764         my $xpath = shift;
1765         my $uri = shift;
1766         my $prefix = shift;
1767         my $unique = shift;
1768
1769         OpenILS::Application::Ingest->post_init();
1770         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1771
1772         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1773         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1774         return $d;
1775 }
1776 __PACKAGE__->register_method(  
1777         api_name        => "open-ils.worm.xpath.record",
1778         method          => "record_xpath",
1779         api_level       => 1,
1780         argc            => 1,
1781 );                      
1782
1783
1784 # --------------------------------------------------------------------------------
1785 # MARC Descriptor
1786
1787 package OpenILS::Application::Ingest::Biblio::Leader;
1788 use base qw/OpenILS::Application::Ingest/;
1789 use Unicode::Normalize;
1790
1791 our %marc_type_groups = (
1792         BKS => q/[at]{1}/,
1793         SER => q/[a]{1}/,
1794         VIS => q/[gkro]{1}/,
1795         MIX => q/[p]{1}/,
1796         MAP => q/[ef]{1}/,
1797         SCO => q/[cd]{1}/,
1798         REC => q/[ij]{1}/,
1799         COM => q/[m]{1}/,
1800 );
1801
1802 sub _type_re {
1803         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1804         return qr/$re/;
1805 }
1806
1807 our %biblio_descriptor_code = (
1808         item_type => sub { substr($ldr,6,1); },
1809         item_form =>
1810                 sub {
1811                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1812                                 return substr($oo8,29,1);
1813                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1814                                 return substr($oo8,23,1);
1815                         }
1816                         return ' ';
1817                 },
1818         bib_level => sub { substr($ldr,7,1); },
1819         control_type => sub { substr($ldr,8,1); },
1820         char_encoding => sub { substr($ldr,9,1); },
1821         enc_level => sub { substr($ldr,17,1); },
1822         cat_form => sub { substr($ldr,18,1); },
1823         pub_status => sub { substr($ldr,5,1); },
1824         item_lang => sub { substr($oo8,35,3); },
1825         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1826         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1827         audience => sub { substr($oo8,22,1); },
1828 );
1829
1830 sub _extract_biblio_descriptors {
1831         my $xml = shift;
1832
1833         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1834         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1835         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1836
1837         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1838         for my $rd_field ( keys %biblio_descriptor_code ) {
1839                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1840         }
1841
1842         return $rd_obj;
1843 }
1844
1845 sub extract_biblio_desc_xml {
1846         my $self = shift;
1847         my $client = shift;
1848         my $xml = shift;
1849
1850         $xml = $parser->parse_string($xml) unless (ref $xml);
1851
1852         return _extract_biblio_descriptors( $xml );
1853 }
1854 __PACKAGE__->register_method(  
1855         api_name        => "open-ils.worm.biblio_leader.xml",
1856         method          => "extract_biblio_desc_xml",
1857         api_level       => 1,
1858         argc            => 1,
1859 );                      
1860
1861 sub extract_biblio_desc_record {
1862         my $self = shift;
1863         my $client = shift;
1864         my $rec = shift;
1865
1866         OpenILS::Application::Ingest->post_init();
1867         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1868
1869         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1870         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1871         return $d;
1872 }
1873 __PACKAGE__->register_method(  
1874         api_name        => "open-ils.worm.biblio_leader.record",
1875         method          => "extract_biblio_desc_record",
1876         api_level       => 1,
1877         argc            => 1,
1878 );                      
1879
1880 # --------------------------------------------------------------------------------
1881 # Flat MARC
1882
1883 package OpenILS::Application::Ingest::FlatMARC;
1884 use base qw/OpenILS::Application::Ingest/;
1885 use Unicode::Normalize;
1886
1887
1888 sub _marcxml_to_full_rows {
1889
1890         my $marcxml = shift;
1891         my $xmltype = shift || 'metabib';
1892
1893         my $type = "Fieldmapper::${xmltype}::full_rec";
1894
1895         my @ns_list;
1896         
1897         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1898
1899         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1900                 next unless $tagline;
1901
1902                 my $ns = $type->new;
1903
1904                 $ns->tag( 'LDR' );
1905                 my $val = $tagline->textContent;
1906                 $val = NFD($val);
1907                 $val =~ s/\pM+//sgo;
1908                 $val =~ s/\pC+//sgo;
1909                 $val =~ s/\W+$//sgo;
1910                 $ns->value( $val );
1911
1912                 push @ns_list, $ns;
1913         }
1914
1915         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1916                 next unless $tagline;
1917
1918                 my $ns = $type->new;
1919
1920                 $ns->tag( $tagline->getAttribute( "tag" ) );
1921                 my $val = $tagline->textContent;
1922                 $val = NFD($val);
1923                 $val =~ s/\pM+//sgo;
1924                 $val =~ s/\pC+//sgo;
1925                 $val =~ s/\W+$//sgo;
1926                 $ns->value( $val );
1927
1928                 push @ns_list, $ns;
1929         }
1930
1931         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1932                 next unless $tagline;
1933
1934                 my $tag = $tagline->getAttribute( "tag" );
1935                 my $ind1 = $tagline->getAttribute( "ind1" );
1936                 my $ind2 = $tagline->getAttribute( "ind2" );
1937
1938                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1939                         next unless $data;
1940
1941                         my $ns = $type->new;
1942
1943                         $ns->tag( $tag );
1944                         $ns->ind1( $ind1 );
1945                         $ns->ind2( $ind2 );
1946                         $ns->subfield( $data->getAttribute( "code" ) );
1947                         my $val = $data->textContent;
1948                         $val = NFD($val);
1949                         $val =~ s/\pM+//sgo;
1950                         $val =~ s/\pC+//sgo;
1951                         $val =~ s/\W+$//sgo;
1952                         $ns->value( lc($val) );
1953
1954                         push @ns_list, $ns;
1955                 }
1956         }
1957
1958         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1959         return @ns_list;
1960 }
1961
1962 sub flat_marc_xml {
1963         my $self = shift;
1964         my $client = shift;
1965         my $xml = shift;
1966
1967         $xml = $parser->parse_string($xml) unless (ref $xml);
1968
1969         my $type = 'metabib';
1970         $type = 'authority' if ($self->api_name =~ /authority/o);
1971
1972         OpenILS::Application::Ingest->post_init();
1973
1974         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1975         return undef;
1976 }
1977 __PACKAGE__->register_method(  
1978         api_name        => "open-ils.worm.flat_marc.authority.xml",
1979         method          => "flat_marc_xml",
1980         api_level       => 1,
1981         argc            => 1,
1982         stream          => 1,
1983 );                      
1984 __PACKAGE__->register_method(  
1985         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1986         method          => "flat_marc_xml",
1987         api_level       => 1,
1988         argc            => 1,
1989         stream          => 1,
1990 );                      
1991
1992 sub flat_marc_record {
1993         my $self = shift;
1994         my $client = shift;
1995         my $rec = shift;
1996
1997         my $type = 'biblio';
1998         $type = 'authority' if ($self->api_name =~ /authority/o);
1999
2000         OpenILS::Application::Ingest->post_init();
2001         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
2002
2003         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
2004         return undef;
2005 }
2006 __PACKAGE__->register_method(  
2007         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
2008         method          => "flat_marc_record",
2009         api_level       => 1,
2010         argc            => 1,
2011         stream          => 1,
2012 );                      
2013 __PACKAGE__->register_method(  
2014         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
2015         method          => "flat_marc_record",
2016         api_level       => 1,
2017         argc            => 1,
2018         stream          => 1,
2019 );                      
2020
2021
2022 # --------------------------------------------------------------------------------
2023 # Fingerprinting
2024
2025 package OpenILS::Application::Ingest::Biblio::Fingerprint;
2026 use base qw/OpenILS::Application::Ingest/;
2027 use Unicode::Normalize;
2028 use OpenSRF::EX qw/:try/;
2029
2030 my @fp_mods_xpath = (
2031         '//mods:mods/mods:typeOfResource[text()="text"]' => [
2032                         title   => {
2033                                         xpath   => [
2034                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2035                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2036                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2037                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2038                                         ],
2039                                         fixup   => sub {
2040                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2041                                                         $text = NFD($text);
2042                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2043                                                         $text =~ s/\pM+//gso;
2044                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2045                                                         $text = lc($text);
2046                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2047                                                         $text =~ s/\s+/ /sgo;
2048                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2049                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2050                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2051                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2052                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2053                                                         $text =~ s/\[.[^\]]+\]//sgo;
2054                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2055                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2056                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2057                                                 },
2058                         },
2059                         author  => {
2060                                         xpath   => [
2061                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2062                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2063                                         ],
2064                                         fixup   => sub {
2065                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2066                                                         $text = NFD($text);
2067                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2068                                                         $text =~ s/\pM+//gso;
2069                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2070                                                         $text = lc($text);
2071                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2072                                                         $text =~ s/\s+/ /sgo;
2073                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2074                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2075                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076                                                         $text =~ s/,?\s+.*$//sgo;
2077                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2078                                                 },
2079                         },
2080         ],
2081
2082         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2083                         title   => {
2084                                         xpath   => [
2085                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2086                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2087                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2088                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2089                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2090                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2091                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2092                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2093                                         ],
2094                                         fixup   => sub {
2095                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2096                                                         $text = NFD($text);
2097                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2098                                                         $text =~ s/\pM+//gso;
2099                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2100                                                         $text = lc($text);
2101                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2102                                                         $text =~ s/\s+/ /sgo;
2103                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2104                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2105                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2106                                                         $text =~ s/\b(?:the|an?)\b//sgo;
2107                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2108                                                         $text =~ s/\[.[^\]]+\]//sgo;
2109                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2110                                                         $text =~ s/\s*[;\/\.]*$//sgo;
2111                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2112                                                 },
2113                         },
2114                         author  => {
2115                                         xpath   => [
2116                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2117                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2118                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2119                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2120                                         ],
2121                                         fixup   => sub {
2122                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2123                                                         $text = NFD($text);
2124                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2125                                                         $text =~ s/\pM+//gso;
2126                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2127                                                         $text = lc($text);
2128                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2129                                                         $text =~ s/\s+/ /sgo;
2130                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2131                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
2132                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2133                                                         $text =~ s/,?\s+.*$//sgo;
2134                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2135                                                 },
2136                         },
2137         ],
2138
2139 );
2140
2141 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2142
2143 sub _fp_mods {
2144         my $mods = shift;
2145         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2146
2147         my $fp_string = '';
2148
2149         my $match_index = 0;
2150         my $block_index = 1;
2151         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2152                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2153
2154                         my $block_name_index = 0;
2155                         my $block_value_index = 1;
2156                         my $block = $fp_mods_xpath[$block_index];
2157                         while ( my $part = $$block[$block_value_index] ) {
2158                                 local $text;
2159                                 for my $xpath ( @{ $part->{xpath} } ) {
2160                                         $text = $mods->findvalue( $xpath );
2161                                         last if ($text);
2162                                 }
2163
2164                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2165
2166                                 if ($text) {
2167                                         $$part{fixup}->();
2168                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2169                                         $fp_string .= $text;
2170                                 }
2171
2172                                 $block_name_index += 2;
2173                                 $block_value_index += 2;
2174                         }
2175                 }
2176                 if ($fp_string) {
2177                         $fp_string =~ s/\W+//gso;
2178                         $log->debug("Fingerprint is [$fp_string]", INFO);;
2179                         return $fp_string;
2180                 }
2181
2182                 $match_index += 2;
2183                 $block_index += 2;
2184         }
2185         return undef;
2186 }
2187
2188 sub refingerprint_bibrec {
2189         my $self = shift;
2190         my $client = shift;
2191         my $rec = shift;
2192
2193         my $commit = 0;
2194         if (!OpenILS::Application::Ingest->in_transaction) {
2195                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2196                 $commit = 1;
2197         }
2198
2199         my $success = 1;
2200         try {
2201                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2202                 for my $b (@$bibs) {
2203                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2204
2205                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2206
2207                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2208
2209                                 OpenILS::Application::Ingest->storage_req(
2210                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
2211                                         { id => $b->id },
2212                                         { fingerprint => $fp->{fingerprint},
2213                                           quality     => $fp->{quality} }
2214                                 );
2215
2216                                 if ($self->api_name !~ /nomap/o) {
2217                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
2218                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2219                                                 $b->id
2220                                         );
2221
2222                                         my $old_mrid;
2223                                         if (ref($old_source_map) and @$old_source_map) {
2224                                                 for my $m (@$old_source_map) {
2225                                                         $old_mrid = $m->metarecord;
2226                                                         OpenILS::Application::Ingest->storage_req(
2227                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2228                                                                 $m->id
2229                                                         );
2230                                                 }
2231                                         }
2232
2233                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
2234                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2235                                                         { metarecord => $old_mrid }
2236                                         ) if ($old_mrid);
2237
2238                                         if (ref($old_sm) and @$old_sm == 0) {
2239                                                 OpenILS::Application::Ingest->storage_req(
2240                                                         'open-ils.storage.direct.metabib.metarecord.delete',
2241                                                         $old_mrid
2242                                                 );
2243                                         }
2244
2245                                         my $mr = OpenILS::Application::Ingest->storage_req(
2246                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2247                                                         { fingerprint => $fp->{fingerprint} }
2248                                         )->[0];
2249                                 
2250                                         unless ($mr) {
2251                                                 $mr = Fieldmapper::metabib::metarecord->new;
2252                                                 $mr->fingerprint( $fp->{fingerprint} );
2253                                                 $mr->master_record( $b->id );
2254                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2255                                         }
2256
2257                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2258                                         $mr_map->metarecord( $mr->id );
2259                                         $mr_map->source( $b->id );
2260                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2261
2262                                 }
2263                         }
2264                         $client->respond($b->id);
2265                 }
2266
2267         } otherwise {
2268                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2269                 $success = 0;
2270         };
2271
2272         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2273         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2274         return undef;
2275 }
2276 __PACKAGE__->register_method(  
2277         api_name        => "open-ils.worm.fingerprint.record.update",
2278         method          => "refingerprint_bibrec",
2279         api_level       => 1,
2280         argc            => 1,
2281         stream          => 1,
2282 );                      
2283
2284 __PACKAGE__->register_method(  
2285         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
2286         method          => "refingerprint_bibrec",
2287         api_level       => 1,
2288         argc            => 1,
2289 );                      
2290
2291 =comment
2292
2293 sub fingerprint_bibrec {
2294         my $self = shift;
2295         my $client = shift;
2296         my $rec = shift;
2297
2298         OpenILS::Application::Ingest->post_init();
2299         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2300
2301         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2302         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2303         return $fp;
2304
2305 }
2306 __PACKAGE__->register_method(  
2307         api_name        => "open-ils.worm.fingerprint.record",
2308         method          => "fingerprint_bibrec",
2309         api_level       => 0,
2310         argc            => 1,
2311 );                      
2312
2313
2314 sub fingerprint_mods {
2315         my $self = shift;
2316         my $client = shift;
2317         my $xml = shift;
2318
2319         OpenILS::Application::Ingest->post_init();
2320         my $mods = $parser->parse_string($xml)->documentElement;
2321
2322         return _fp_mods( $mods );
2323 }
2324 __PACKAGE__->register_method(  
2325         api_name        => "open-ils.worm.fingerprint.mods",
2326         method          => "fingerprint_mods",
2327         api_level       => 1,
2328         argc            => 1,
2329 );                      
2330
2331 sub fingerprint_marc {
2332         my $self = shift;
2333         my $client = shift;
2334         my $xml = shift;
2335
2336         $xml = $parser->parse_string($xml) unless (ref $xml);
2337
2338         OpenILS::Application::Ingest->post_init();
2339         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2340         $log->debug("Returning [$fp] as fingerprint", INFO);
2341         return $fp;
2342 }
2343 __PACKAGE__->register_method(  
2344         api_name        => "open-ils.worm.fingerprint.marc",
2345         method          => "fingerprint_marc",
2346         api_level       => 1,
2347         argc            => 1,
2348 );                      
2349
2350
2351 =cut
2352
2353 sub biblio_fingerprint_record {
2354         my $self = shift;
2355         my $client = shift;
2356         my $rec = shift;
2357
2358         OpenILS::Application::Ingest->post_init();
2359
2360         my $marc = OpenILS::Application::Ingest
2361                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2362                         ->marc;
2363
2364         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2365         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2366         return $fp;
2367 }
2368 __PACKAGE__->register_method(  
2369         api_name        => "open-ils.worm.fingerprint.record",
2370         method          => "biblio_fingerprint_record",
2371         api_level       => 1,
2372         argc            => 1,
2373 );                      
2374
2375 our $fp_script;
2376 sub biblio_fingerprint {
2377         my $self = shift;
2378         my $client = shift;
2379         my $marc = shift;
2380
2381         OpenILS::Application::Ingest->post_init();
2382
2383         $marc = $parser->parse_string($marc) unless (ref $marc);
2384
2385         my $mods = OpenILS::Application::Ingest::entityize(
2386                 $mods_sheet
2387                         ->transform( $marc )
2388                         ->documentElement
2389                         ->toString,
2390                 'D'
2391         );
2392
2393         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2394
2395         warn $marc;
2396         $log->internal("Got MARC [$marc]");
2397         $log->internal("Created MODS [$mods]");
2398
2399         if(!$fp_script) {
2400                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2401                 my $conf = OpenSRF::Utils::SettingsClient->new;
2402
2403                 my $libs        = $conf->config_value(@pfx, 'script_path');
2404                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2405                 my $script_libs = (ref($libs)) ? $libs : [$libs];
2406
2407                 $log->debug("Loading script $script_file for biblio fingerprinting...");
2408                 
2409                 $fp_script = new OpenILS::Utils::ScriptRunner
2410                         ( file          => $script_file,
2411                           paths         => $script_libs,
2412                           reset_count   => 1000 );
2413         }
2414
2415         $log->debug("Applying environment for biblio fingerprinting...");
2416
2417         my $env = {marc => $marc, mods => $mods};
2418         #my $res = {fingerprint => '', quality => '0'};
2419
2420         $fp_script->insert('environment' => $env);
2421         #$fp_script->insert('result' => $res);
2422
2423         $log->debug("Running script for biblio fingerprinting...");
2424
2425         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
2426
2427         $log->debug("Script for biblio fingerprinting completed successfully...");
2428
2429         return $res;
2430 }
2431 __PACKAGE__->register_method(  
2432         api_name        => "open-ils.worm.fingerprint.marc",
2433         method          => "biblio_fingerprint",
2434         api_level       => 1,
2435         argc            => 1,
2436 );                      
2437
2438 # --------------------------------------------------------------------------------
2439
2440 1;
2441
2442 __END__
2443 my $in_xact;
2444 my $begin;
2445 my $commit;
2446 my $rollback;
2447 my $lookup;
2448 my $update_entry;
2449 my $mr_lookup;
2450 my $mr_update;
2451 my $mr_create;
2452 my $create_source_map;
2453 my $sm_lookup;
2454 my $rm_old_rd;
2455 my $rm_old_sm;
2456 my $rm_old_fr;
2457 my $rm_old_tr;
2458 my $rm_old_ar;
2459 my $rm_old_sr;
2460 my $rm_old_kr;
2461 my $rm_old_ser;
2462
2463 my $fr_create;
2464 my $rd_create;
2465 my $create = {};
2466
2467 my %descriptor_code = (
2468         item_type => 'substr($ldr,6,1)',
2469         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2470         bib_level => 'substr($ldr,7,1)',
2471         control_type => 'substr($ldr,8,1)',
2472         char_encoding => 'substr($ldr,9,1)',
2473         enc_level => 'substr($ldr,17,1)',
2474         cat_form => 'substr($ldr,18,1)',
2475         pub_status => 'substr($ldr,5,1)',
2476         item_lang => 'substr($oo8,35,3)',
2477         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2478         audience => 'substr($oo8,22,1)',
2479 );
2480
2481 sub wormize {
2482
2483         my $self = shift;
2484         my $client = shift;
2485         my @docids = @_;
2486
2487         my $no_map = 0;
2488         if ($self->api_name =~ /no_map/o) {
2489                 $no_map = 1;
2490         }
2491
2492         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2493                 unless ($in_xact);
2494         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2495                 unless ($begin);
2496         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2497                 unless ($commit);
2498         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2499                 unless ($rollback);
2500         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2501                 unless ($sm_lookup);
2502         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2503                 unless ($mr_lookup);
2504         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2505                 unless ($mr_update);
2506         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2507                 unless ($lookup);
2508         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2509                 unless ($update_entry);
2510         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2511                 unless ($rm_old_sm);
2512         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2513                 unless ($rm_old_rd);
2514         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2515                 unless ($rm_old_fr);
2516         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2517                 unless ($rm_old_tr);
2518         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2519                 unless ($rm_old_ar);
2520         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2521                 unless ($rm_old_sr);
2522         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2523                 unless ($rm_old_kr);
2524         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2525                 unless ($rm_old_ser);
2526         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2527                 unless ($mr_create);
2528         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2529                 unless ($create_source_map);
2530         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2531                 unless ($rd_create);
2532         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2533                 unless ($fr_create);
2534         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2535                 unless ($$create{title});
2536         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2537                 unless ($$create{author});
2538         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2539                 unless ($$create{subject});
2540         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2541                 unless ($$create{keyword});
2542         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2543                 unless ($$create{series});
2544
2545
2546         my ($outer_xact) = $in_xact->run;
2547         try {
2548                 unless ($outer_xact) {
2549                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2550                         my ($r) = $begin->run($client);
2551                         unless (defined $r and $r) {
2552                                 $rollback->run;
2553                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2554                         }
2555                 }
2556         } catch Error with {
2557                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2558         };
2559
2560         my @source_maps;
2561         my @entry_list;
2562         my @mr_list;
2563         my @rd_list;
2564         my @ns_list;
2565         my @mods_data;
2566         my $ret = 0;
2567         for my $entry ( $lookup->run(@docids) ) {
2568                 # step -1: grab the doc from storage
2569                 next unless ($entry);
2570
2571                 if(!$mods_sheet) {
2572                         my $xslt_doc = $parser->parse_file(
2573                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2574                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2575                 }
2576
2577                 my $xml = $entry->marc;
2578                 my $docid = $entry->id;
2579                 my $marcdoc = $parser->parse_string($xml);
2580                 my $modsdoc = $mods_sheet->transform($marcdoc);
2581
2582                 my $mods = $modsdoc->documentElement;
2583                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2584
2585                 $entry->fingerprint( fingerprint_mods( $mods ) );
2586                 push @entry_list, $entry;
2587
2588                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2589
2590                 unless ($no_map) {
2591                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
2592                         if (!$mr || !@$mr) {
2593                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2594                                 $mr = new Fieldmapper::metabib::metarecord;
2595                                 $mr->fingerprint( $entry->fingerprint );
2596                                 $mr->master_record( $entry->id );
2597                                 my ($new_mr) = $mr_create->run($mr);
2598                                 $mr->id($new_mr);
2599                                 unless (defined $mr) {
2600                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2601                                 }
2602                         } else {
2603                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2604                                 $mr->mods('');
2605                                 push @mr_list, $mr;
2606                         }
2607
2608                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
2609                         $sm->metarecord( $mr->id );
2610                         $sm->source( $entry->id );
2611                         push @source_maps, $sm;
2612                 }
2613
2614                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2615                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2616
2617                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2618                 for my $rd_field ( keys %descriptor_code ) {
2619                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2620                 }
2621                 $rd_obj->record( $docid );
2622                 push @rd_list, $rd_obj;
2623
2624                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2625
2626                 # step 2: build the KOHA rows
2627                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2628                 $_->record( $docid ) for (@tmp_list);
2629                 push @ns_list, @tmp_list;
2630
2631                 $ret++;
2632
2633                 last unless ($self->api_name =~ /batch$/o);
2634         }
2635
2636         $rm_old_rd->run( { record => \@docids } );
2637         $rm_old_fr->run( { record => \@docids } );
2638         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2639         $rm_old_tr->run( { source => \@docids } );
2640         $rm_old_ar->run( { source => \@docids } );
2641         $rm_old_sr->run( { source => \@docids } );
2642         $rm_old_kr->run( { source => \@docids } );
2643         $rm_old_ser->run( { source => \@docids } );
2644
2645         unless ($no_map) {
2646                 my ($sm) = $create_source_map->run(@source_maps);
2647                 unless (defined $sm) {
2648                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2649                 }
2650                 my ($mr) = $mr_update->run(@mr_list);
2651                 unless (defined $mr) {
2652                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2653                 }
2654         }
2655
2656         my ($re) = $update_entry->run(@entry_list);
2657         unless (defined $re) {
2658                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2659         }
2660
2661         my ($rd) = $rd_create->run(@rd_list);
2662         unless (defined $rd) {
2663                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2664         }
2665
2666         my ($fr) = $fr_create->run(@ns_list);
2667         unless (defined $fr) {
2668                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2669         }
2670
2671         # step 5: insert the new metadata
2672         for my $class ( qw/title author subject keyword series/ ) {
2673                 my @md_list = ();
2674                 for my $doc ( @mods_data ) {
2675                         my ($did) = keys %$doc;
2676                         my ($data) = values %$doc;
2677
2678                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2679                         for my $row ( keys %{ $$data{$class} } ) {
2680                                 next unless (exists $$data{$class}{$row});
2681                                 next unless ($$data{$class}{$row}{value});
2682                                 my $fm_obj = $fm_constructor->new;
2683                                 $fm_obj->value( $$data{$class}{$row}{value} );
2684                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
2685                                 $fm_obj->source( $did );
2686                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2687
2688                                 push @md_list, $fm_obj;
2689                         }
2690                 }
2691                         
2692                 my ($cr) = $$create{$class}->run(@md_list);
2693                 unless (defined $cr) {
2694                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2695                 }
2696         }
2697
2698         unless ($outer_xact) {
2699                 $log->debug("Commiting transaction started by the Ingest.", INFO);
2700                 my ($c) = $commit->run;
2701                 unless (defined $c and $c) {
2702                         $rollback->run;
2703                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2704                 }
2705         }
2706
2707         return $ret;
2708 }
2709 __PACKAGE__->register_method( 
2710         api_name        => "open-ils.worm.wormize",
2711         method          => "wormize",
2712         api_level       => 1,
2713         argc            => 1,
2714 );
2715 __PACKAGE__->register_method( 
2716         api_name        => "open-ils.worm.wormize.no_map",
2717         method          => "wormize",
2718         api_level       => 1,
2719         argc            => 1,
2720 );
2721 __PACKAGE__->register_method( 
2722         api_name        => "open-ils.worm.wormize.batch",
2723         method          => "wormize",
2724         api_level       => 1,
2725         argc            => 1,
2726 );
2727 __PACKAGE__->register_method( 
2728         api_name        => "open-ils.worm.wormize.no_map.batch",
2729         method          => "wormize",
2730         api_level       => 1,
2731         argc            => 1,
2732 );
2733
2734
2735 my $ain_xact;
2736 my $abegin;
2737 my $acommit;
2738 my $arollback;
2739 my $alookup;
2740 my $aupdate_entry;
2741 my $amr_lookup;
2742 my $amr_update;
2743 my $amr_create;
2744 my $acreate_source_map;
2745 my $asm_lookup;
2746 my $arm_old_rd;
2747 my $arm_old_sm;
2748 my $arm_old_fr;
2749 my $arm_old_tr;
2750 my $arm_old_ar;
2751 my $arm_old_sr;
2752 my $arm_old_kr;
2753 my $arm_old_ser;
2754
2755 my $afr_create;
2756 my $ard_create;
2757 my $acreate = {};
2758
2759 sub authority_wormize {
2760
2761         my $self = shift;
2762         my $client = shift;
2763         my @docids = @_;
2764
2765         my $no_map = 0;
2766         if ($self->api_name =~ /no_map/o) {
2767                 $no_map = 1;
2768         }
2769
2770         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2771                 unless ($in_xact);
2772         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2773                 unless ($begin);
2774         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2775                 unless ($commit);
2776         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2777                 unless ($rollback);
2778         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2779                 unless ($alookup);
2780         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2781                 unless ($aupdate_entry);
2782         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2783                 unless ($arm_old_rd);
2784         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2785                 unless ($arm_old_fr);
2786         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2787                 unless ($ard_create);
2788         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2789                 unless ($afr_create);
2790
2791
2792         my ($outer_xact) = $in_xact->run;
2793         try {
2794                 unless ($outer_xact) {
2795                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2796                         my ($r) = $begin->run($client);
2797                         unless (defined $r and $r) {
2798                                 $rollback->run;
2799                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2800                         }
2801                 }
2802         } catch Error with {
2803                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2804         };
2805
2806         my @source_maps;
2807         my @entry_list;
2808         my @mr_list;
2809         my @rd_list;
2810         my @ns_list;
2811         my @mads_data;
2812         my $ret = 0;
2813         for my $entry ( $lookup->run(@docids) ) {
2814                 # step -1: grab the doc from storage
2815                 next unless ($entry);
2816
2817                 #if(!$mads_sheet) {
2818                 #       my $xslt_doc = $parser->parse_file(
2819                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
2820                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2821                 #}
2822
2823                 my $xml = $entry->marc;
2824                 my $docid = $entry->id;
2825                 my $marcdoc = $parser->parse_string($xml);
2826                 #my $madsdoc = $mads_sheet->transform($marcdoc);
2827
2828                 #my $mads = $madsdoc->documentElement;
2829                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2830
2831                 push @entry_list, $entry;
2832
2833                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2834                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2835
2836                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2837                 for my $rd_field ( keys %descriptor_code ) {
2838                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2839                 }
2840                 $rd_obj->record( $docid );
2841                 push @rd_list, $rd_obj;
2842
2843                 # step 2: build the KOHA rows
2844                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2845                 $_->record( $docid ) for (@tmp_list);
2846                 push @ns_list, @tmp_list;
2847
2848                 $ret++;
2849
2850                 last unless ($self->api_name =~ /batch$/o);
2851         }
2852
2853         $arm_old_rd->run( { record => \@docids } );
2854         $arm_old_fr->run( { record => \@docids } );
2855
2856         my ($rd) = $ard_create->run(@rd_list);
2857         unless (defined $rd) {
2858                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2859         }
2860
2861         my ($fr) = $fr_create->run(@ns_list);
2862         unless (defined $fr) {
2863                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2864         }
2865
2866         unless ($outer_xact) {
2867                 $log->debug("Commiting transaction started by Ingest.", INFO);
2868                 my ($c) = $commit->run;
2869                 unless (defined $c and $c) {
2870                         $rollback->run;
2871                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2872                 }
2873         }
2874
2875         return $ret;
2876 }
2877 __PACKAGE__->register_method( 
2878         api_name        => "open-ils.worm.authortiy.wormize",
2879         method          => "wormize",
2880         api_level       => 1,
2881         argc            => 1,
2882 );
2883 __PACKAGE__->register_method( 
2884         api_name        => "open-ils.worm.authority.wormize.batch",
2885         method          => "wormize",
2886         api_level       => 1,
2887         argc            => 1,
2888 );
2889
2890
2891 # --------------------------------------------------------------------------------
2892
2893
2894 sub _marcxml_to_full_rows {
2895
2896         my $marcxml = shift;
2897         my $type = shift || 'Fieldmapper::metabib::full_rec';
2898
2899         my @ns_list;
2900         
2901         my $root = $marcxml->documentElement;
2902
2903         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2904                 next unless $tagline;
2905
2906                 my $ns = new Fieldmapper::metabib::full_rec;
2907
2908                 $ns->tag( 'LDR' );
2909                 my $val = NFD($tagline->textContent);
2910                 $val =~ s/(\pM+)//gso;
2911                 $ns->value( $val );
2912
2913                 push @ns_list, $ns;
2914         }
2915
2916         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2917                 next unless $tagline;
2918
2919                 my $ns = new Fieldmapper::metabib::full_rec;
2920
2921                 $ns->tag( $tagline->getAttribute( "tag" ) );
2922                 my $val = NFD($tagline->textContent);
2923                 $val =~ s/(\pM+)//gso;
2924                 $ns->value( $val );
2925
2926                 push @ns_list, $ns;
2927         }
2928
2929         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2930                 next unless $tagline;
2931
2932                 my $tag = $tagline->getAttribute( "tag" );
2933                 my $ind1 = $tagline->getAttribute( "ind1" );
2934                 my $ind2 = $tagline->getAttribute( "ind2" );
2935
2936                 for my $data ( $tagline->childNodes ) {
2937                         next unless $data;
2938
2939                         my $ns = $type->new;
2940
2941                         $ns->tag( $tag );
2942                         $ns->ind1( $ind1 );
2943                         $ns->ind2( $ind2 );
2944                         $ns->subfield( $data->getAttribute( "code" ) );
2945                         my $val = NFD($data->textContent);
2946                         $val =~ s/(\pM+)//gso;
2947                         $ns->value( lc($val) );
2948
2949                         push @ns_list, $ns;
2950                 }
2951         }
2952         return @ns_list;
2953 }
2954
2955 sub _get_field_value {
2956
2957         my( $root, $xpath ) = @_;
2958
2959         my $string = "";
2960
2961         # grab the set of matching nodes
2962         my @nodes = $root->findnodes( $xpath );
2963         for my $value (@nodes) {
2964
2965                 # grab all children of the node
2966                 my @children = $value->childNodes();
2967                 for my $child (@children) {
2968
2969                         # add the childs content to the growing buffer
2970                         my $content = quotemeta($child->textContent);
2971                         next if ($string =~ /$content/);  # uniquify the values
2972                         $string .= $child->textContent . " ";
2973                 }
2974                 if( ! @children ) {
2975                         $string .= $value->textContent . " ";
2976                 }
2977         }
2978         $string = NFD($string);
2979         $string =~ s/(\pM)//gso;
2980         return lc($string);
2981 }
2982
2983
2984 sub modsdoc_to_values {
2985         my( $self, $mods ) = @_;
2986         my $data = {};
2987         for my $class (keys %$xpathset) {
2988                 $data->{$class} = {};
2989                 for my $type (keys %{$xpathset->{$class}}) {
2990                         $data->{$class}->{$type} = {};
2991                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2992                 }
2993         }
2994         return $data;
2995 }
2996
2997
2998 1;
2999
3000