]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Ingest.pm
new non-saving worm stuff
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Ingest.pm
1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
3
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
9
10 use OpenILS::Utils::FlatXML;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use OpenILS::Utils::Fieldmapper;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 our %supported_formats = (
21         mods3   => {ns => 'http://www.loc.gov/mods/v3'},
22         mods    => {ns => 'http://www.loc.gov/mods/'},
23         marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24         srw_dc  => {ns => ''},
25         oai_dc  => {ns => ''},
26         rdf_dc  => {ns => ''},
27 );
28
29
30 our $log = 'OpenSRF::Utils::Logger';
31
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
34
35 our $mods_sheet;
36 our $mads_sheet;
37 our $xpathset = {};
38 sub initialize {}
39 sub child_init {}
40
41 sub post_init {
42
43         unless (keys %$xpathset) {
44                 $log->debug("Running post_init", DEBUG);
45
46                 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
47
48                 unless ($supported_formats{mods}{xslt}) {
49                         $log->debug("Loading MODS XSLT", DEBUG);
50                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51                         $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
52                 }
53
54                 unless ($supported_formats{mods3}{xslt}) {
55                         $log->debug("Loading MODS v3 XSLT", DEBUG);
56                         my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57                         $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
58                 }
59
60
61                 my $req = __PACKAGE__->storage_req('open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } });
62                 for my $f (@$req) {
63                         $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
64                         $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
65                         $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
66                         $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
67                 }
68         }
69 }
70
71 sub entityize {
72         my $stuff = shift;
73         my $form = shift;
74
75         if ($form eq 'D') {
76                 $stuff = NFD($stuff);
77         } else {
78                 $stuff = NFC($stuff);
79         }
80
81         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
82         return $stuff;
83 }
84
85 # --------------------------------------------------------------------------------
86 # MARC index extraction
87
88 package OpenILS::Application::Ingest::XPATH;
89 use base qw/OpenILS::Application::Ingest/;
90 use Unicode::Normalize;
91
92 # give this a MODS documentElement and an XPATH expression
93 sub xpath_to_string {
94         my $xml = shift;
95         my $xpath = shift;
96         my $ns_uri = shift;
97         my $ns_prefix = shift;
98         my $unique = shift;
99
100         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
101
102         my $string = "";
103
104         # grab the set of matching nodes
105         my @nodes = $xml->findnodes( $xpath );
106         for my $value (@nodes) {
107
108                 # grab all children of the node
109                 my @children = $value->childNodes();
110                 for my $child (@children) {
111
112                         # add the childs content to the growing buffer
113                         my $content = quotemeta($child->textContent);
114                         next if ($unique && $string =~ /$content/);  # uniquify the values
115                         $string .= $child->textContent . " ";
116                 }
117                 if( ! @children ) {
118                         $string .= $value->textContent . " ";
119                 }
120         }
121         return NFD($string);
122 }
123
124 sub class_index_string_xml {
125         my $self = shift;
126         my $client = shift;
127         my $xml = shift;
128         my @classes = @_;
129
130         OpenILS::Application::Ingest->post_init();
131         $xml = $parser->parse_string($xml) unless (ref $xml);
132         
133         for my $class (@classes) {
134                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
135                 for my $type ( keys %{ $xpathset->{$class} } ) {
136
137                         my $def = $xpathset->{$class}->{$type};
138                         my $value =  xpath_to_string(
139                                         $mods_sheet->transform($xml)->documentElement,
140                                         $def->{xpath},
141                                         $supported_formats{$def->{format}}{ns},
142                                         $def->{format},
143                                         1
144                         );
145
146                         next unless $value;
147
148                         $value =~ s/\pM+//sgo;
149                         $value =~ s/\pC+//sgo;
150                         #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
151
152                         $value =~ s/(\w)\./$1/sgo;
153                         $value = lc($value);
154
155                         my $fm = $class_constructor->new;
156                         $fm->value( $value );
157                         $fm->field( $xpathset->{$class}->{$type}->{id} );
158                         $client->respond($fm);
159                 }
160         }
161         return undef;
162 }
163 __PACKAGE__->register_method(  
164         api_name        => "open-ils.ingest.field_entry.class.xml",
165         method          => "class_index_string_xml",
166         api_level       => 1,
167         argc            => 2,
168         stream          => 1,
169 );                      
170
171 sub class_index_string_record {
172         my $self = shift;
173         my $client = shift;
174         my $rec = shift;
175         my @classes = shift;
176
177         OpenILS::Application::Ingest->post_init();
178         my $r = OpenSRF::AppSession->create('open-ils.cstore')->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
179
180         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
181                 $fm->source($rec);
182                 $client->respond($fm);
183         }
184         return undef;
185 }
186 __PACKAGE__->register_method(  
187         api_name        => "open-ils.worm.field_entry.class.record",
188         method          => "class_index_string_record",
189         api_level       => 1,
190         argc            => 2,
191         stream          => 1,
192 );                      
193
194
195 1;
196
197 __END__
198
199 sub in_transaction {
200         OpenILS::Application::Ingest->post_init();
201         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
202 }
203
204 sub begin_transaction {
205         my $self = shift;
206         my $client = shift;
207         
208         OpenILS::Application::Ingest->post_init();
209         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
210         
211         try {
212                 if (!$outer_xact) {
213                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
214                         #__PACKAGE__->st_sess->connect;
215                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
216                         unless (defined $r and $r) {
217                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
218                                 #__PACKAGE__->st_sess->disconnect;
219                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
220                         }
221                 }
222         } otherwise {
223                 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
224         };
225
226         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
227 }
228
229 sub rollback_transaction {
230         my $self = shift;
231         my $client = shift;
232
233         OpenILS::Application::Ingest->post_init();
234         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
235
236         try {
237                 if ($outer_xact) {
238                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
239                 } else {
240                         $log->debug("Ingest isn't inside a transaction.", INFO);
241                 }
242         } catch Error with {
243                 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
244         };
245
246         return 1;
247 }
248
249 sub commit_transaction {
250         my $self = shift;
251         my $client = shift;
252
253         OpenILS::Application::Ingest->post_init();
254         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
255
256         try {
257                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
258                 if ($outer_xact) {
259                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
260                         unless (defined $r and $r) {
261                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
262                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
263                         }
264                         #__PACKAGE__->st_sess->disconnect;
265                 } else {
266                         $log->debug("Ingest isn't inside a transaction.", INFO);
267                 }
268         } catch Error with {
269                 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
270         };
271
272         return 1;
273 }
274
275 sub storage_req {
276         my $self = shift;
277         my $method = shift;
278         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
279         return shift( @res );
280 }
281
282 sub scrub_authority_record {
283         my $self = shift;
284         my $client = shift;
285         my $rec = shift;
286
287         my $commit = 0;
288         if (!OpenILS::Application::Ingest->in_transaction) {
289                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
290                 $commit = 1;
291         }
292
293         my $success = 1;
294         try {
295                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
296
297                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
298                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
299
300                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
301         } otherwise {
302                 $log->debug('Scrubbing failed : '.shift(), ERROR);
303                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
304                 $success = 0;
305         };
306
307         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
308         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
309         return $success;
310 }
311 __PACKAGE__->register_method(  
312         api_name        => "open-ils.worm.scrub.authority",
313         method          => "scrub_authority_record",
314         api_level       => 1,
315         argc            => 1,
316 );                      
317
318
319 sub scrub_metabib_record {
320         my $self = shift;
321         my $client = shift;
322         my $rec = shift;
323
324         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
325                 $rec = OpenILS::Application::Ingest->storage_req(
326                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
327                 );
328         }
329
330         my $commit = 0;
331         if (!OpenILS::Application::Ingest->in_transaction) {
332                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
333                 $commit = 1;
334         }
335
336         my $success = 1;
337         try {
338                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
339                 
340                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
341                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
342                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
343                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
344                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
345                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
346                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
347                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
348
349                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
350                 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
351
352                 for my $mr (@$masters) {
353                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
354                         my $others = OpenILS::Application::Ingest->storage_req(
355                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
356
357                         if (@$others) {
358                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
359                                 $mr->master_record($others->[0]->source);
360                                 OpenILS::Application::Ingest->storage_req(
361                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
362                                         { id => $mr->id },
363                                         { master_record => $others->[0]->source, mods => undef }
364                                 );
365                         } else {
366                                 warn "Removing metarecord whose master is $rec";
367                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
368                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
369                                 warn "Metarecord removed";
370                                 $log->debug( "Metarecord removed", DEBUG);
371                         }
372                 }
373
374                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
375
376         } otherwise {
377                 $log->debug('Scrubbing failed : '.shift(), ERROR);
378                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
379                 $success = 0;
380         };
381
382         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
383         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
384         return $success;
385 }
386 __PACKAGE__->register_method(  
387         api_name        => "open-ils.worm.scrub.biblio",
388         method          => "scrub_metabib_record",
389         api_level       => 1,
390         argc            => 1,
391 );                      
392
393 sub wormize_biblio_metarecord {
394         my $self = shift;
395         my $client = shift;
396         my $mrec = shift;
397
398         my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
399
400         my $count = 0;
401         for my $r (@$recs) {
402                 my $success = 0;
403                 try {
404                         $success = wormize_biblio_record($self => $client => $r->source);
405                         $client->respond(
406                                 { record  => $r->source,
407                                   metarecord => $rec->metarecord,
408                                   success => $success,
409                                 }
410                         );
411                 } catch Error with {
412                         my $e = shift;
413                         $client->respond(
414                                 { record  => $r->source,
415                                   metarecord => $rec->metarecord,
416                                   success => $success,
417                                   error   => $e,
418                                 }
419                         );
420                 };
421         }
422         return undef;
423 }
424 __PACKAGE__->register_method(
425         api_name        => "open-ils.worm.wormize.metarecord",
426         method          => "wormize_biblio_metarecord",
427         api_level       => 1,
428         argc            => 1,
429         stream          => 1,
430 );
431 __PACKAGE__->register_method(
432         api_name        => "open-ils.worm.wormize.metarecord.nomap",
433         method          => "wormize_biblio_metarecord",
434         api_level       => 1,
435         argc            => 1,
436         stream          => 1,
437 );
438 __PACKAGE__->register_method(
439         api_name        => "open-ils.worm.wormize.metarecord.noscrub",
440         method          => "wormize_biblio_metarecord",
441         api_level       => 1,
442         argc            => 1,
443         stream          => 1,
444 );
445 __PACKAGE__->register_method(
446         api_name        => "open-ils.worm.wormize.metarecord.nomap.noscrub",
447         method          => "wormize_biblio_metarecord",
448         api_level       => 1,
449         argc            => 1,
450         stream          => 1,
451 );
452
453
454 sub wormize_biblio_record {
455         my $self = shift;
456         my $client = shift;
457         my $rec = shift;
458
459         if ( ref($rec) && ref($rec) =~ /HASH/o ) {
460                 $rec = OpenILS::Application::Ingest->storage_req(
461                         'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
462                 );
463         }
464
465
466         my $commit = 0;
467         if (!OpenILS::Application::Ingest->in_transaction) {
468                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
469                 $commit = 1;
470         }
471
472         my $success = 1;
473         try {
474                 # clean up the cruft
475                 unless ($self->api_name =~ /noscrub/o) {
476                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
477                 }
478
479                 # now redo 'em
480                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
481
482                 my @full_rec = ();
483                 my @rec_descriptor = ();
484                 my %field_entry = (
485                         title   => [],
486                         author  => [],
487                         subject => [],
488                         keyword => [],
489                         series  => [],
490                 );
491                 my %metarecord = ();
492                 my @source_map = ();
493                 for my $r (@$bibs) {
494                         try {
495                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
496
497                                 my $xml = $parser->parse_string($r->marc);
498
499                                 #update the fingerprint
500                                 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
501                                 OpenILS::Application::Ingest->storage_req(
502                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
503                                         { id => $r->id },
504                                         { fingerprint => $fp->{fingerprint},
505                                           quality     => int($fp->{quality}) }
506                                 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
507
508                                 # the full_rec stuff
509                                 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
510                                         $fr->record( $r->id );
511                                         push @full_rec, $fr;
512                                 }
513
514                                 # the rec_descriptor stuff
515                                 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
516                                 $rd->record( $r->id );
517                                 push @rec_descriptor, $rd;
518                         
519                                 # the indexing field entry stuff
520                                 for my $class ( qw/title author subject keyword series/ ) {
521                                         for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
522                                                 $fe->source( $r->id );
523                                                 push @{$field_entry{$class}}, $fe;
524                                         }
525                                 }
526
527                                 unless ($self->api_name =~ /nomap/o) {
528                                         my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint}  )->[0];
529                                 
530                                         unless ($mr) {
531                                                 $mr = Fieldmapper::metabib::metarecord->new;
532                                                 $mr->fingerprint( $fp->{fingerprint} );
533                                                 $mr->master_record( $r->id );
534                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
535                                         }
536
537                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
538                                         $mr_map->metarecord( $mr->id );
539                                         $mr_map->source( $r->id );
540                                         push @source_map, $mr_map;
541
542                                         $metarecord{$mr->id} = $mr;
543                                 }
544                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
545                         } otherwise {
546                                 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
547                                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
548                         };
549                 }
550                 
551
552                 if (@rec_descriptor) {
553                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
554
555                         OpenILS::Application::Ingest->storage_req(
556                                 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
557                                 @source_map
558                         ) if (@source_map);
559
560                         for my $mr ( values %metarecord ) {
561                                 my $sources = OpenILS::Application::Ingest->storage_req(
562                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
563                                         $mr->id
564                                 );
565
566                                 my $bibs = OpenILS::Application::Ingest->storage_req(
567                                         'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
568                                         [ map { $_->source } @$sources ]
569                                 );
570
571                                 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
572
573                                 OpenILS::Application::Ingest->storage_req(
574                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
575                                         { id => $mr->id },
576                                         { master_record => $master->id, mods => undef }
577                                 );
578                         }
579
580                         OpenILS::Application::Ingest->storage_req(
581                                 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
582                                 @rec_descriptor
583                         ) if (@rec_descriptor);
584
585                         OpenILS::Application::Ingest->storage_req(
586                                 'open-ils.storage.direct.metabib.full_rec.batch.create',
587                                 @full_rec
588                         ) if (@full_rec);
589
590                         OpenILS::Application::Ingest->storage_req(
591                                 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
592                                 @{ $field_entry{title} }
593                         ) if (@{ $field_entry{title} });
594
595                         OpenILS::Application::Ingest->storage_req(
596                                 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
597                                 @{ $field_entry{author} }
598                         ) if (@{ $field_entry{author} });
599                         
600                         OpenILS::Application::Ingest->storage_req(
601                                 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
602                                 @{ $field_entry{subject} }
603                         ) if (@{ $field_entry{subject} });
604
605                         OpenILS::Application::Ingest->storage_req(
606                                 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
607                                 @{ $field_entry{keyword} }
608                         ) if (@{ $field_entry{keyword} });
609
610                         OpenILS::Application::Ingest->storage_req(
611                                 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
612                                 @{ $field_entry{series} }
613                         ) if (@{ $field_entry{series} });
614
615                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
616                 } else {
617                         $success = 0;
618                 }
619
620         } otherwise {
621                 $log->debug('Wormization failed : '.shift(), ERROR);
622                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
623                 $success = 0;
624         };
625
626         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
627         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
628         return $success;
629 }
630 __PACKAGE__->register_method(
631         api_name        => "open-ils.worm.wormize.biblio",
632         method          => "wormize_biblio_record",
633         api_level       => 1,
634         argc            => 1,
635 );
636 __PACKAGE__->register_method(
637         api_name        => "open-ils.worm.wormize.biblio.nomap",
638         method          => "wormize_biblio_record",
639         api_level       => 1,
640         argc            => 1,
641 );
642 __PACKAGE__->register_method(
643         api_name        => "open-ils.worm.wormize.biblio.noscrub",
644         method          => "wormize_biblio_record",
645         api_level       => 1,
646         argc            => 1,
647 );
648 __PACKAGE__->register_method(
649         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
650         method          => "wormize_biblio_record",
651         api_level       => 1,
652         argc            => 1,
653 );
654
655 sub wormize_authority_record {
656         my $self = shift;
657         my $client = shift;
658         my $rec = shift;
659
660         my $commit = 0;
661         if (!OpenILS::Application::Ingest->in_transaction) {
662                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
663                 $commit = 1;
664         }
665
666         my $success = 1;
667         try {
668                 # clean up the cruft
669                 unless ($self->api_name =~ /noscrub/o) {
670                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
671                 }
672
673                 # now redo 'em
674                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
675
676                 my @full_rec = ();
677                 my @rec_descriptor = ();
678                 for my $r (@$bibs) {
679                         my $xml = $parser->parse_string($r->marc);
680
681                         # the full_rec stuff
682                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
683                                 $fr->record( $r->id );
684                                 push @full_rec, $fr;
685                         }
686
687                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
688                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
689                         #$rd->record( $r->id );
690                         #push @rec_descriptor, $rd;
691                         
692                 }
693
694                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
695
696                 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
697                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
698
699                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
700
701         } otherwise {
702                 $log->debug('Wormization failed : '.shift(), ERROR);
703                 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
704                 $success = 0;
705         };
706
707         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
708         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
709         return $success;
710 }
711 __PACKAGE__->register_method(
712         api_name        => "open-ils.worm.wormize.authority",
713         method          => "wormize_authority_record",
714         api_level       => 1,
715         argc            => 1,
716 );
717 __PACKAGE__->register_method(
718         api_name        => "open-ils.worm.wormize.authority.noscrub",
719         method          => "wormize_authority_record",
720         api_level       => 1,
721         argc            => 1,
722 );
723
724
725 # --------------------------------------------------------------------------------
726 # MARC index extraction
727
728 package OpenILS::Application::Ingest::XPATH;
729 use base qw/OpenILS::Application::Ingest/;
730 use Unicode::Normalize;
731
732 # give this a MODS documentElement and an XPATH expression
733 sub _xpath_to_string {
734         my $xml = shift;
735         my $xpath = shift;
736         my $ns_uri = shift;
737         my $ns_prefix = shift;
738         my $unique = shift;
739
740         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
741
742         my $string = "";
743
744         # grab the set of matching nodes
745         my @nodes = $xml->findnodes( $xpath );
746         for my $value (@nodes) {
747
748                 # grab all children of the node
749                 my @children = $value->childNodes();
750                 for my $child (@children) {
751
752                         # add the childs content to the growing buffer
753                         my $content = quotemeta($child->textContent);
754                         next if ($unique && $string =~ /$content/);  # uniquify the values
755                         $string .= $child->textContent . " ";
756                 }
757                 if( ! @children ) {
758                         $string .= $value->textContent . " ";
759                 }
760         }
761         return NFD($string);
762 }
763
764 sub class_all_index_string_xml {
765         my $self = shift;
766         my $client = shift;
767         my $xml = shift;
768         my $class = shift;
769
770         OpenILS::Application::Ingest->post_init();
771         $xml = $parser->parse_string($xml) unless (ref $xml);
772         
773         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
774         for my $type ( keys %{ $xpathset->{$class} } ) {
775                 my $value =  _xpath_to_string(
776                                 $mods_sheet->transform($xml)->documentElement,
777                                 $xpathset->{$class}->{$type}->{xpath},
778                                 "http://www.loc.gov/mods/",
779                                 "mods",
780                                 1
781                 );
782
783                 next unless $value;
784
785                 $value =~ s/\pM+//sgo;
786                 $value =~ s/\pC+//sgo;
787                 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
788
789                 $value =~ s/(\w)\./$1/sgo;
790                 $value = lc($value);
791
792                 my $fm = $class_constructor->new;
793                 $fm->value( $value );
794                 $fm->field( $xpathset->{$class}->{$type}->{id} );
795                 $client->respond($fm);
796         }
797         return undef;
798 }
799 __PACKAGE__->register_method(  
800         api_name        => "open-ils.worm.field_entry.class.xml",
801         method          => "class_all_index_string_xml",
802         api_level       => 1,
803         argc            => 1,
804         stream          => 1,
805 );                      
806
807 sub class_all_index_string_record {
808         my $self = shift;
809         my $client = shift;
810         my $rec = shift;
811         my $class = shift;
812
813         OpenILS::Application::Ingest->post_init();
814         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
815
816         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
817                 $fm->source($rec);
818                 $client->respond($fm);
819         }
820         return undef;
821 }
822 __PACKAGE__->register_method(  
823         api_name        => "open-ils.worm.field_entry.class.record",
824         method          => "class_all_index_string_record",
825         api_level       => 1,
826         argc            => 1,
827         stream          => 1,
828 );                      
829
830
831 sub class_index_string_xml {
832         my $self = shift;
833         my $client = shift;
834         my $xml = shift;
835         my $class = shift;
836         my $type = shift;
837
838         OpenILS::Application::Ingest->post_init();
839         $xml = $parser->parse_string($xml) unless (ref $xml);
840         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
841 }
842 __PACKAGE__->register_method(  
843         api_name        => "open-ils.worm.class.type.xml",
844         method          => "class_index_string_xml",
845         api_level       => 1,
846         argc            => 1,
847 );                      
848
849 sub class_index_string_record {
850         my $self = shift;
851         my $client = shift;
852         my $rec = shift;
853         my $class = shift;
854         my $type = shift;
855
856         OpenILS::Application::Ingest->post_init();
857         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
858
859         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
860         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
861         return $d;
862 }
863 __PACKAGE__->register_method(  
864         api_name        => "open-ils.worm.class.type.record",
865         method          => "class_index_string_record",
866         api_level       => 1,
867         argc            => 1,
868 );                      
869
870 sub xml_xpath {
871         my $self = shift;
872         my $client = shift;
873         my $xml = shift;
874         my $xpath = shift;
875         my $uri = shift;
876         my $prefix = shift;
877         my $unique = shift;
878
879         OpenILS::Application::Ingest->post_init();
880         $xml = $parser->parse_string($xml) unless (ref $xml);
881         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
882 }
883 __PACKAGE__->register_method(  
884         api_name        => "open-ils.worm.xpath.xml",
885         method          => "xml_xpath",
886         api_level       => 1,
887         argc            => 1,
888 );                      
889
890 sub record_xpath {
891         my $self = shift;
892         my $client = shift;
893         my $rec = shift;
894         my $xpath = shift;
895         my $uri = shift;
896         my $prefix = shift;
897         my $unique = shift;
898
899         OpenILS::Application::Ingest->post_init();
900         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
901
902         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
903         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
904         return $d;
905 }
906 __PACKAGE__->register_method(  
907         api_name        => "open-ils.worm.xpath.record",
908         method          => "record_xpath",
909         api_level       => 1,
910         argc            => 1,
911 );                      
912
913
914 # --------------------------------------------------------------------------------
915 # MARC Descriptor
916
917 package OpenILS::Application::Ingest::Biblio::Leader;
918 use base qw/OpenILS::Application::Ingest/;
919 use Unicode::Normalize;
920
921 our %marc_type_groups = (
922         BKS => q/[at]{1}/,
923         SER => q/[a]{1}/,
924         VIS => q/[gkro]{1}/,
925         MIX => q/[p]{1}/,
926         MAP => q/[ef]{1}/,
927         SCO => q/[cd]{1}/,
928         REC => q/[ij]{1}/,
929         COM => q/[m]{1}/,
930 );
931
932 sub _type_re {
933         my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
934         return qr/$re/;
935 }
936
937 our %biblio_descriptor_code = (
938         item_type => sub { substr($ldr,6,1); },
939         item_form =>
940                 sub {
941                         if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
942                                 return substr($oo8,29,1);
943                         } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
944                                 return substr($oo8,23,1);
945                         }
946                         return ' ';
947                 },
948         bib_level => sub { substr($ldr,7,1); },
949         control_type => sub { substr($ldr,8,1); },
950         char_encoding => sub { substr($ldr,9,1); },
951         enc_level => sub { substr($ldr,17,1); },
952         cat_form => sub { substr($ldr,18,1); },
953         pub_status => sub { substr($ldr,5,1); },
954         item_lang => sub { substr($oo8,35,3); },
955         lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
956         type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
957         audience => sub { substr($oo8,22,1); },
958 );
959
960 sub _extract_biblio_descriptors {
961         my $xml = shift;
962
963         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
964         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
965         local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
966
967         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
968         for my $rd_field ( keys %biblio_descriptor_code ) {
969                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
970         }
971
972         return $rd_obj;
973 }
974
975 sub extract_biblio_desc_xml {
976         my $self = shift;
977         my $client = shift;
978         my $xml = shift;
979
980         $xml = $parser->parse_string($xml) unless (ref $xml);
981
982         return _extract_biblio_descriptors( $xml );
983 }
984 __PACKAGE__->register_method(  
985         api_name        => "open-ils.worm.biblio_leader.xml",
986         method          => "extract_biblio_desc_xml",
987         api_level       => 1,
988         argc            => 1,
989 );                      
990
991 sub extract_biblio_desc_record {
992         my $self = shift;
993         my $client = shift;
994         my $rec = shift;
995
996         OpenILS::Application::Ingest->post_init();
997         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
998
999         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1000         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1001         return $d;
1002 }
1003 __PACKAGE__->register_method(  
1004         api_name        => "open-ils.worm.biblio_leader.record",
1005         method          => "extract_biblio_desc_record",
1006         api_level       => 1,
1007         argc            => 1,
1008 );                      
1009
1010 # --------------------------------------------------------------------------------
1011 # Flat MARC
1012
1013 package OpenILS::Application::Ingest::FlatMARC;
1014 use base qw/OpenILS::Application::Ingest/;
1015 use Unicode::Normalize;
1016
1017
1018 sub _marcxml_to_full_rows {
1019
1020         my $marcxml = shift;
1021         my $xmltype = shift || 'metabib';
1022
1023         my $type = "Fieldmapper::${xmltype}::full_rec";
1024
1025         my @ns_list;
1026         
1027         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1028
1029         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1030                 next unless $tagline;
1031
1032                 my $ns = $type->new;
1033
1034                 $ns->tag( 'LDR' );
1035                 my $val = $tagline->textContent;
1036                 $val = NFD($val);
1037                 $val =~ s/(\pM+)//gso;
1038                 $ns->value( $val );
1039
1040                 push @ns_list, $ns;
1041         }
1042
1043         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1044                 next unless $tagline;
1045
1046                 my $ns = $type->new;
1047
1048                 $ns->tag( $tagline->getAttribute( "tag" ) );
1049                 my $val = $tagline->textContent;
1050                 $val = NFD($val);
1051                 $val =~ s/(\pM+)//gso;
1052                 $ns->value( $val );
1053
1054                 push @ns_list, $ns;
1055         }
1056
1057         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1058                 next unless $tagline;
1059
1060                 my $tag = $tagline->getAttribute( "tag" );
1061                 my $ind1 = $tagline->getAttribute( "ind1" );
1062                 my $ind2 = $tagline->getAttribute( "ind2" );
1063
1064                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1065                         next unless $data;
1066
1067                         my $ns = $type->new;
1068
1069                         $ns->tag( $tag );
1070                         $ns->ind1( $ind1 );
1071                         $ns->ind2( $ind2 );
1072                         $ns->subfield( $data->getAttribute( "code" ) );
1073                         my $val = $data->textContent;
1074                         $val = NFD($val);
1075                         $val =~ s/(\pM+)//gso;
1076                         $ns->value( lc($val) );
1077
1078                         push @ns_list, $ns;
1079                 }
1080         }
1081
1082         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1083         return @ns_list;
1084 }
1085
1086 sub flat_marc_xml {
1087         my $self = shift;
1088         my $client = shift;
1089         my $xml = shift;
1090
1091         $xml = $parser->parse_string($xml) unless (ref $xml);
1092
1093         my $type = 'metabib';
1094         $type = 'authority' if ($self->api_name =~ /authority/o);
1095
1096         OpenILS::Application::Ingest->post_init();
1097
1098         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1099         return undef;
1100 }
1101 __PACKAGE__->register_method(  
1102         api_name        => "open-ils.worm.flat_marc.authority.xml",
1103         method          => "flat_marc_xml",
1104         api_level       => 1,
1105         argc            => 1,
1106         stream          => 1,
1107 );                      
1108 __PACKAGE__->register_method(  
1109         api_name        => "open-ils.worm.flat_marc.biblio.xml",
1110         method          => "flat_marc_xml",
1111         api_level       => 1,
1112         argc            => 1,
1113         stream          => 1,
1114 );                      
1115
1116 sub flat_marc_record {
1117         my $self = shift;
1118         my $client = shift;
1119         my $rec = shift;
1120
1121         my $type = 'biblio';
1122         $type = 'authority' if ($self->api_name =~ /authority/o);
1123
1124         OpenILS::Application::Ingest->post_init();
1125         my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1126
1127         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1128         return undef;
1129 }
1130 __PACKAGE__->register_method(  
1131         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
1132         method          => "flat_marc_record",
1133         api_level       => 1,
1134         argc            => 1,
1135         stream          => 1,
1136 );                      
1137 __PACKAGE__->register_method(  
1138         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
1139         method          => "flat_marc_record",
1140         api_level       => 1,
1141         argc            => 1,
1142         stream          => 1,
1143 );                      
1144
1145
1146 # --------------------------------------------------------------------------------
1147 # Fingerprinting
1148
1149 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1150 use base qw/OpenILS::Application::Ingest/;
1151 use Unicode::Normalize;
1152 use OpenSRF::EX qw/:try/;
1153
1154 my @fp_mods_xpath = (
1155         '//mods:mods/mods:typeOfResource[text()="text"]' => [
1156                         title   => {
1157                                         xpath   => [
1158                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1159                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1160                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1161                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1162                                         ],
1163                                         fixup   => sub {
1164                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1165                                                         $text = NFD($text);
1166                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1167                                                         $text =~ s/\pM+//gso;
1168                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1169                                                         $text = lc($text);
1170                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1171                                                         $text =~ s/\s+/ /sgo;
1172                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1173                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1174                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1175                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1176                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1177                                                         $text =~ s/\[.[^\]]+\]//sgo;
1178                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1179                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1180                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1181                                                 },
1182                         },
1183                         author  => {
1184                                         xpath   => [
1185                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1186                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1187                                         ],
1188                                         fixup   => sub {
1189                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1190                                                         $text = NFD($text);
1191                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1192                                                         $text =~ s/\pM+//gso;
1193                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1194                                                         $text = lc($text);
1195                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1196                                                         $text =~ s/\s+/ /sgo;
1197                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1198                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1199                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1200                                                         $text =~ s/,?\s+.*$//sgo;
1201                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1202                                                 },
1203                         },
1204         ],
1205
1206         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1207                         title   => {
1208                                         xpath   => [
1209                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1210                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1211                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1212                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1213                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1214                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1215                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1216                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1217                                         ],
1218                                         fixup   => sub {
1219                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1220                                                         $text = NFD($text);
1221                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1222                                                         $text =~ s/\pM+//gso;
1223                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1224                                                         $text = lc($text);
1225                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1226                                                         $text =~ s/\s+/ /sgo;
1227                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1228                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1229                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1230                                                         $text =~ s/\b(?:the|an?)\b//sgo;
1231                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1232                                                         $text =~ s/\[.[^\]]+\]//sgo;
1233                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1234                                                         $text =~ s/\s*[;\/\.]*$//sgo;
1235                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1236                                                 },
1237                         },
1238                         author  => {
1239                                         xpath   => [
1240                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1241                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1242                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1243                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1244                                         ],
1245                                         fixup   => sub {
1246                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1247                                                         $text = NFD($text);
1248                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1249                                                         $text =~ s/\pM+//gso;
1250                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1251                                                         $text = lc($text);
1252                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1253                                                         $text =~ s/\s+/ /sgo;
1254                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1255                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
1256                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1257                                                         $text =~ s/,?\s+.*$//sgo;
1258                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1259                                                 },
1260                         },
1261         ],
1262
1263 );
1264
1265 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1266
1267 sub _fp_mods {
1268         my $mods = shift;
1269         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1270
1271         my $fp_string = '';
1272
1273         my $match_index = 0;
1274         my $block_index = 1;
1275         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1276                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1277
1278                         my $block_name_index = 0;
1279                         my $block_value_index = 1;
1280                         my $block = $fp_mods_xpath[$block_index];
1281                         while ( my $part = $$block[$block_value_index] ) {
1282                                 local $text;
1283                                 for my $xpath ( @{ $part->{xpath} } ) {
1284                                         $text = $mods->findvalue( $xpath );
1285                                         last if ($text);
1286                                 }
1287
1288                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1289
1290                                 if ($text) {
1291                                         $$part{fixup}->();
1292                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1293                                         $fp_string .= $text;
1294                                 }
1295
1296                                 $block_name_index += 2;
1297                                 $block_value_index += 2;
1298                         }
1299                 }
1300                 if ($fp_string) {
1301                         $fp_string =~ s/\W+//gso;
1302                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1303                         return $fp_string;
1304                 }
1305
1306                 $match_index += 2;
1307                 $block_index += 2;
1308         }
1309         return undef;
1310 }
1311
1312 sub refingerprint_bibrec {
1313         my $self = shift;
1314         my $client = shift;
1315         my $rec = shift;
1316
1317         my $commit = 0;
1318         if (!OpenILS::Application::Ingest->in_transaction) {
1319                 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1320                 $commit = 1;
1321         }
1322
1323         my $success = 1;
1324         try {
1325                 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1326                 for my $b (@$bibs) {
1327                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1328
1329                         if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1330
1331                                 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1332
1333                                 OpenILS::Application::Ingest->storage_req(
1334                                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1335                                         { id => $b->id },
1336                                         { fingerprint => $fp->{fingerprint},
1337                                           quality     => $fp->{quality} }
1338                                 );
1339
1340                                 if ($self->api_name !~ /nomap/o) {
1341                                         my $old_source_map = OpenILS::Application::Ingest->storage_req(
1342                                                 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1343                                                 $b->id
1344                                         );
1345
1346                                         my $old_mrid;
1347                                         if (ref($old_source_map) and @$old_source_map) {
1348                                                 for my $m (@$old_source_map) {
1349                                                         $old_mrid = $m->metarecord;
1350                                                         OpenILS::Application::Ingest->storage_req(
1351                                                                 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1352                                                                 $m->id
1353                                                         );
1354                                                 }
1355                                         }
1356
1357                                         my $old_sm = OpenILS::Application::Ingest->storage_req(
1358                                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1359                                                         { metarecord => $old_mrid }
1360                                         ) if ($old_mrid);
1361
1362                                         if (ref($old_sm) and @$old_sm == 0) {
1363                                                 OpenILS::Application::Ingest->storage_req(
1364                                                         'open-ils.storage.direct.metabib.metarecord.delete',
1365                                                         $old_mrid
1366                                                 );
1367                                         }
1368
1369                                         my $mr = OpenILS::Application::Ingest->storage_req(
1370                                                         'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1371                                                         { fingerprint => $fp->{fingerprint} }
1372                                         )->[0];
1373                                 
1374                                         unless ($mr) {
1375                                                 $mr = Fieldmapper::metabib::metarecord->new;
1376                                                 $mr->fingerprint( $fp->{fingerprint} );
1377                                                 $mr->master_record( $b->id );
1378                                                 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1379                                         }
1380
1381                                         my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1382                                         $mr_map->metarecord( $mr->id );
1383                                         $mr_map->source( $b->id );
1384                                         OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1385
1386                                 }
1387                         }
1388                         $client->respond($b->id);
1389                 }
1390
1391         } otherwise {
1392                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1393                 $success = 0;
1394         };
1395
1396         OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1397         OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1398         return undef;
1399 }
1400 __PACKAGE__->register_method(  
1401         api_name        => "open-ils.worm.fingerprint.record.update",
1402         method          => "refingerprint_bibrec",
1403         api_level       => 1,
1404         argc            => 1,
1405         stream          => 1,
1406 );                      
1407
1408 __PACKAGE__->register_method(  
1409         api_name        => "open-ils.worm.fingerprint.record.update.nomap",
1410         method          => "refingerprint_bibrec",
1411         api_level       => 1,
1412         argc            => 1,
1413 );                      
1414
1415 =comment
1416
1417 sub fingerprint_bibrec {
1418         my $self = shift;
1419         my $client = shift;
1420         my $rec = shift;
1421
1422         OpenILS::Application::Ingest->post_init();
1423         my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1424
1425         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1426         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1427         return $fp;
1428
1429 }
1430 __PACKAGE__->register_method(  
1431         api_name        => "open-ils.worm.fingerprint.record",
1432         method          => "fingerprint_bibrec",
1433         api_level       => 0,
1434         argc            => 1,
1435 );                      
1436
1437
1438 sub fingerprint_mods {
1439         my $self = shift;
1440         my $client = shift;
1441         my $xml = shift;
1442
1443         OpenILS::Application::Ingest->post_init();
1444         my $mods = $parser->parse_string($xml)->documentElement;
1445
1446         return _fp_mods( $mods );
1447 }
1448 __PACKAGE__->register_method(  
1449         api_name        => "open-ils.worm.fingerprint.mods",
1450         method          => "fingerprint_mods",
1451         api_level       => 1,
1452         argc            => 1,
1453 );                      
1454
1455 sub fingerprint_marc {
1456         my $self = shift;
1457         my $client = shift;
1458         my $xml = shift;
1459
1460         $xml = $parser->parse_string($xml) unless (ref $xml);
1461
1462         OpenILS::Application::Ingest->post_init();
1463         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1464         $log->debug("Returning [$fp] as fingerprint", INFO);
1465         return $fp;
1466 }
1467 __PACKAGE__->register_method(  
1468         api_name        => "open-ils.worm.fingerprint.marc",
1469         method          => "fingerprint_marc",
1470         api_level       => 1,
1471         argc            => 1,
1472 );                      
1473
1474
1475 =cut
1476
1477 sub biblio_fingerprint_record {
1478         my $self = shift;
1479         my $client = shift;
1480         my $rec = shift;
1481
1482         OpenILS::Application::Ingest->post_init();
1483
1484         my $marc = OpenILS::Application::Ingest
1485                         ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1486                         ->marc;
1487
1488         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1489         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1490         return $fp;
1491 }
1492 __PACKAGE__->register_method(  
1493         api_name        => "open-ils.worm.fingerprint.record",
1494         method          => "biblio_fingerprint_record",
1495         api_level       => 1,
1496         argc            => 1,
1497 );                      
1498
1499 our $fp_script;
1500 sub biblio_fingerprint {
1501         my $self = shift;
1502         my $client = shift;
1503         my $marc = shift;
1504
1505         OpenILS::Application::Ingest->post_init();
1506
1507         $marc = $parser->parse_string($marc) unless (ref $marc);
1508
1509         my $mods = OpenILS::Application::Ingest::entityize(
1510                 $mods_sheet
1511                         ->transform( $marc )
1512                         ->documentElement
1513                         ->toString,
1514                 'D'
1515         );
1516
1517         $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
1518
1519         warn $marc;
1520         $log->internal("Got MARC [$marc]");
1521         $log->internal("Created MODS [$mods]");
1522
1523         if(!$fp_script) {
1524                 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1525                 my $conf = OpenSRF::Utils::SettingsClient->new;
1526
1527                 my $libs        = $conf->config_value(@pfx, 'script_path');
1528                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1529                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1530
1531                 $log->debug("Loading script $script_file for biblio fingerprinting...");
1532                 
1533                 $fp_script = new OpenILS::Utils::ScriptRunner
1534                         ( file          => $script_file,
1535                           paths         => $script_libs,
1536                           reset_count   => 1000 );
1537         }
1538
1539         $log->debug("Applying environment for biblio fingerprinting...");
1540
1541         my $env = {marc => $marc, mods => $mods};
1542         #my $res = {fingerprint => '', quality => '0'};
1543
1544         $fp_script->insert('environment' => $env);
1545         #$fp_script->insert('result' => $res);
1546
1547         $log->debug("Running script for biblio fingerprinting...");
1548
1549         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return 0);
1550
1551         $log->debug("Script for biblio fingerprinting completed successfully...");
1552
1553         return $res;
1554 }
1555 __PACKAGE__->register_method(  
1556         api_name        => "open-ils.worm.fingerprint.marc",
1557         method          => "biblio_fingerprint",
1558         api_level       => 1,
1559         argc            => 1,
1560 );                      
1561
1562 # --------------------------------------------------------------------------------
1563
1564 1;
1565
1566 __END__
1567 my $in_xact;
1568 my $begin;
1569 my $commit;
1570 my $rollback;
1571 my $lookup;
1572 my $update_entry;
1573 my $mr_lookup;
1574 my $mr_update;
1575 my $mr_create;
1576 my $create_source_map;
1577 my $sm_lookup;
1578 my $rm_old_rd;
1579 my $rm_old_sm;
1580 my $rm_old_fr;
1581 my $rm_old_tr;
1582 my $rm_old_ar;
1583 my $rm_old_sr;
1584 my $rm_old_kr;
1585 my $rm_old_ser;
1586
1587 my $fr_create;
1588 my $rd_create;
1589 my $create = {};
1590
1591 my %descriptor_code = (
1592         item_type => 'substr($ldr,6,1)',
1593         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1594         bib_level => 'substr($ldr,7,1)',
1595         control_type => 'substr($ldr,8,1)',
1596         char_encoding => 'substr($ldr,9,1)',
1597         enc_level => 'substr($ldr,17,1)',
1598         cat_form => 'substr($ldr,18,1)',
1599         pub_status => 'substr($ldr,5,1)',
1600         item_lang => 'substr($oo8,35,3)',
1601         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1602         audience => 'substr($oo8,22,1)',
1603 );
1604
1605 sub wormize {
1606
1607         my $self = shift;
1608         my $client = shift;
1609         my @docids = @_;
1610
1611         my $no_map = 0;
1612         if ($self->api_name =~ /no_map/o) {
1613                 $no_map = 1;
1614         }
1615
1616         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1617                 unless ($in_xact);
1618         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1619                 unless ($begin);
1620         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1621                 unless ($commit);
1622         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1623                 unless ($rollback);
1624         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
1625                 unless ($sm_lookup);
1626         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
1627                 unless ($mr_lookup);
1628         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
1629                 unless ($mr_update);
1630         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
1631                 unless ($lookup);
1632         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
1633                 unless ($update_entry);
1634         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
1635                 unless ($rm_old_sm);
1636         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
1637                 unless ($rm_old_rd);
1638         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
1639                 unless ($rm_old_fr);
1640         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
1641                 unless ($rm_old_tr);
1642         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
1643                 unless ($rm_old_ar);
1644         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
1645                 unless ($rm_old_sr);
1646         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
1647                 unless ($rm_old_kr);
1648         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
1649                 unless ($rm_old_ser);
1650         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
1651                 unless ($mr_create);
1652         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
1653                 unless ($create_source_map);
1654         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
1655                 unless ($rd_create);
1656         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
1657                 unless ($fr_create);
1658         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
1659                 unless ($$create{title});
1660         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
1661                 unless ($$create{author});
1662         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
1663                 unless ($$create{subject});
1664         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
1665                 unless ($$create{keyword});
1666         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
1667                 unless ($$create{series});
1668
1669
1670         my ($outer_xact) = $in_xact->run;
1671         try {
1672                 unless ($outer_xact) {
1673                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1674                         my ($r) = $begin->run($client);
1675                         unless (defined $r and $r) {
1676                                 $rollback->run;
1677                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1678                         }
1679                 }
1680         } catch Error with {
1681                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
1682         };
1683
1684         my @source_maps;
1685         my @entry_list;
1686         my @mr_list;
1687         my @rd_list;
1688         my @ns_list;
1689         my @mods_data;
1690         my $ret = 0;
1691         for my $entry ( $lookup->run(@docids) ) {
1692                 # step -1: grab the doc from storage
1693                 next unless ($entry);
1694
1695                 if(!$mods_sheet) {
1696                         my $xslt_doc = $parser->parse_file(
1697                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1698                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
1699                 }
1700
1701                 my $xml = $entry->marc;
1702                 my $docid = $entry->id;
1703                 my $marcdoc = $parser->parse_string($xml);
1704                 my $modsdoc = $mods_sheet->transform($marcdoc);
1705
1706                 my $mods = $modsdoc->documentElement;
1707                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1708
1709                 $entry->fingerprint( fingerprint_mods( $mods ) );
1710                 push @entry_list, $entry;
1711
1712                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
1713
1714                 unless ($no_map) {
1715                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
1716                         if (!$mr || !@$mr) {
1717                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
1718                                 $mr = new Fieldmapper::metabib::metarecord;
1719                                 $mr->fingerprint( $entry->fingerprint );
1720                                 $mr->master_record( $entry->id );
1721                                 my ($new_mr) = $mr_create->run($mr);
1722                                 $mr->id($new_mr);
1723                                 unless (defined $mr) {
1724                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
1725                                 }
1726                         } else {
1727                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
1728                                 $mr->mods('');
1729                                 push @mr_list, $mr;
1730                         }
1731
1732                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
1733                         $sm->metarecord( $mr->id );
1734                         $sm->source( $entry->id );
1735                         push @source_maps, $sm;
1736                 }
1737
1738                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1739                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1740
1741                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1742                 for my $rd_field ( keys %descriptor_code ) {
1743                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1744                 }
1745                 $rd_obj->record( $docid );
1746                 push @rd_list, $rd_obj;
1747
1748                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
1749
1750                 # step 2: build the KOHA rows
1751                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
1752                 $_->record( $docid ) for (@tmp_list);
1753                 push @ns_list, @tmp_list;
1754
1755                 $ret++;
1756
1757                 last unless ($self->api_name =~ /batch$/o);
1758         }
1759
1760         $rm_old_rd->run( { record => \@docids } );
1761         $rm_old_fr->run( { record => \@docids } );
1762         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
1763         $rm_old_tr->run( { source => \@docids } );
1764         $rm_old_ar->run( { source => \@docids } );
1765         $rm_old_sr->run( { source => \@docids } );
1766         $rm_old_kr->run( { source => \@docids } );
1767         $rm_old_ser->run( { source => \@docids } );
1768
1769         unless ($no_map) {
1770                 my ($sm) = $create_source_map->run(@source_maps);
1771                 unless (defined $sm) {
1772                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
1773                 }
1774                 my ($mr) = $mr_update->run(@mr_list);
1775                 unless (defined $mr) {
1776                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
1777                 }
1778         }
1779
1780         my ($re) = $update_entry->run(@entry_list);
1781         unless (defined $re) {
1782                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
1783         }
1784
1785         my ($rd) = $rd_create->run(@rd_list);
1786         unless (defined $rd) {
1787                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
1788         }
1789
1790         my ($fr) = $fr_create->run(@ns_list);
1791         unless (defined $fr) {
1792                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
1793         }
1794
1795         # step 5: insert the new metadata
1796         for my $class ( qw/title author subject keyword series/ ) {
1797                 my @md_list = ();
1798                 for my $doc ( @mods_data ) {
1799                         my ($did) = keys %$doc;
1800                         my ($data) = values %$doc;
1801
1802                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
1803                         for my $row ( keys %{ $$data{$class} } ) {
1804                                 next unless (exists $$data{$class}{$row});
1805                                 next unless ($$data{$class}{$row}{value});
1806                                 my $fm_obj = $fm_constructor->new;
1807                                 $fm_obj->value( $$data{$class}{$row}{value} );
1808                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
1809                                 $fm_obj->source( $did );
1810                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
1811
1812                                 push @md_list, $fm_obj;
1813                         }
1814                 }
1815                         
1816                 my ($cr) = $$create{$class}->run(@md_list);
1817                 unless (defined $cr) {
1818                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
1819                 }
1820         }
1821
1822         unless ($outer_xact) {
1823                 $log->debug("Commiting transaction started by the Ingest.", INFO);
1824                 my ($c) = $commit->run;
1825                 unless (defined $c and $c) {
1826                         $rollback->run;
1827                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1828                 }
1829         }
1830
1831         return $ret;
1832 }
1833 __PACKAGE__->register_method( 
1834         api_name        => "open-ils.worm.wormize",
1835         method          => "wormize",
1836         api_level       => 1,
1837         argc            => 1,
1838 );
1839 __PACKAGE__->register_method( 
1840         api_name        => "open-ils.worm.wormize.no_map",
1841         method          => "wormize",
1842         api_level       => 1,
1843         argc            => 1,
1844 );
1845 __PACKAGE__->register_method( 
1846         api_name        => "open-ils.worm.wormize.batch",
1847         method          => "wormize",
1848         api_level       => 1,
1849         argc            => 1,
1850 );
1851 __PACKAGE__->register_method( 
1852         api_name        => "open-ils.worm.wormize.no_map.batch",
1853         method          => "wormize",
1854         api_level       => 1,
1855         argc            => 1,
1856 );
1857
1858
1859 my $ain_xact;
1860 my $abegin;
1861 my $acommit;
1862 my $arollback;
1863 my $alookup;
1864 my $aupdate_entry;
1865 my $amr_lookup;
1866 my $amr_update;
1867 my $amr_create;
1868 my $acreate_source_map;
1869 my $asm_lookup;
1870 my $arm_old_rd;
1871 my $arm_old_sm;
1872 my $arm_old_fr;
1873 my $arm_old_tr;
1874 my $arm_old_ar;
1875 my $arm_old_sr;
1876 my $arm_old_kr;
1877 my $arm_old_ser;
1878
1879 my $afr_create;
1880 my $ard_create;
1881 my $acreate = {};
1882
1883 sub authority_wormize {
1884
1885         my $self = shift;
1886         my $client = shift;
1887         my @docids = @_;
1888
1889         my $no_map = 0;
1890         if ($self->api_name =~ /no_map/o) {
1891                 $no_map = 1;
1892         }
1893
1894         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1895                 unless ($in_xact);
1896         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1897                 unless ($begin);
1898         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1899                 unless ($commit);
1900         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1901                 unless ($rollback);
1902         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1903                 unless ($alookup);
1904         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1905                 unless ($aupdate_entry);
1906         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1907                 unless ($arm_old_rd);
1908         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1909                 unless ($arm_old_fr);
1910         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1911                 unless ($ard_create);
1912         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1913                 unless ($afr_create);
1914
1915
1916         my ($outer_xact) = $in_xact->run;
1917         try {
1918                 unless ($outer_xact) {
1919                         $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1920                         my ($r) = $begin->run($client);
1921                         unless (defined $r and $r) {
1922                                 $rollback->run;
1923                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1924                         }
1925                 }
1926         } catch Error with {
1927                 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
1928         };
1929
1930         my @source_maps;
1931         my @entry_list;
1932         my @mr_list;
1933         my @rd_list;
1934         my @ns_list;
1935         my @mads_data;
1936         my $ret = 0;
1937         for my $entry ( $lookup->run(@docids) ) {
1938                 # step -1: grab the doc from storage
1939                 next unless ($entry);
1940
1941                 #if(!$mads_sheet) {
1942                 #       my $xslt_doc = $parser->parse_file(
1943                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1944                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1945                 #}
1946
1947                 my $xml = $entry->marc;
1948                 my $docid = $entry->id;
1949                 my $marcdoc = $parser->parse_string($xml);
1950                 #my $madsdoc = $mads_sheet->transform($marcdoc);
1951
1952                 #my $mads = $madsdoc->documentElement;
1953                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1954
1955                 push @entry_list, $entry;
1956
1957                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1958                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1959
1960                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1961                 for my $rd_field ( keys %descriptor_code ) {
1962                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1963                 }
1964                 $rd_obj->record( $docid );
1965                 push @rd_list, $rd_obj;
1966
1967                 # step 2: build the KOHA rows
1968                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1969                 $_->record( $docid ) for (@tmp_list);
1970                 push @ns_list, @tmp_list;
1971
1972                 $ret++;
1973
1974                 last unless ($self->api_name =~ /batch$/o);
1975         }
1976
1977         $arm_old_rd->run( { record => \@docids } );
1978         $arm_old_fr->run( { record => \@docids } );
1979
1980         my ($rd) = $ard_create->run(@rd_list);
1981         unless (defined $rd) {
1982                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1983         }
1984
1985         my ($fr) = $fr_create->run(@ns_list);
1986         unless (defined $fr) {
1987                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1988         }
1989
1990         unless ($outer_xact) {
1991                 $log->debug("Commiting transaction started by Ingest.", INFO);
1992                 my ($c) = $commit->run;
1993                 unless (defined $c and $c) {
1994                         $rollback->run;
1995                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1996                 }
1997         }
1998
1999         return $ret;
2000 }
2001 __PACKAGE__->register_method( 
2002         api_name        => "open-ils.worm.authortiy.wormize",
2003         method          => "wormize",
2004         api_level       => 1,
2005         argc            => 1,
2006 );
2007 __PACKAGE__->register_method( 
2008         api_name        => "open-ils.worm.authority.wormize.batch",
2009         method          => "wormize",
2010         api_level       => 1,
2011         argc            => 1,
2012 );
2013
2014
2015 # --------------------------------------------------------------------------------
2016
2017
2018 sub _marcxml_to_full_rows {
2019
2020         my $marcxml = shift;
2021         my $type = shift || 'Fieldmapper::metabib::full_rec';
2022
2023         my @ns_list;
2024         
2025         my $root = $marcxml->documentElement;
2026
2027         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2028                 next unless $tagline;
2029
2030                 my $ns = new Fieldmapper::metabib::full_rec;
2031
2032                 $ns->tag( 'LDR' );
2033                 my $val = NFD($tagline->textContent);
2034                 $val =~ s/(\pM+)//gso;
2035                 $ns->value( $val );
2036
2037                 push @ns_list, $ns;
2038         }
2039
2040         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2041                 next unless $tagline;
2042
2043                 my $ns = new Fieldmapper::metabib::full_rec;
2044
2045                 $ns->tag( $tagline->getAttribute( "tag" ) );
2046                 my $val = NFD($tagline->textContent);
2047                 $val =~ s/(\pM+)//gso;
2048                 $ns->value( $val );
2049
2050                 push @ns_list, $ns;
2051         }
2052
2053         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2054                 next unless $tagline;
2055
2056                 my $tag = $tagline->getAttribute( "tag" );
2057                 my $ind1 = $tagline->getAttribute( "ind1" );
2058                 my $ind2 = $tagline->getAttribute( "ind2" );
2059
2060                 for my $data ( $tagline->childNodes ) {
2061                         next unless $data;
2062
2063                         my $ns = $type->new;
2064
2065                         $ns->tag( $tag );
2066                         $ns->ind1( $ind1 );
2067                         $ns->ind2( $ind2 );
2068                         $ns->subfield( $data->getAttribute( "code" ) );
2069                         my $val = NFD($data->textContent);
2070                         $val =~ s/(\pM+)//gso;
2071                         $ns->value( lc($val) );
2072
2073                         push @ns_list, $ns;
2074                 }
2075         }
2076         return @ns_list;
2077 }
2078
2079 sub _get_field_value {
2080
2081         my( $root, $xpath ) = @_;
2082
2083         my $string = "";
2084
2085         # grab the set of matching nodes
2086         my @nodes = $root->findnodes( $xpath );
2087         for my $value (@nodes) {
2088
2089                 # grab all children of the node
2090                 my @children = $value->childNodes();
2091                 for my $child (@children) {
2092
2093                         # add the childs content to the growing buffer
2094                         my $content = quotemeta($child->textContent);
2095                         next if ($string =~ /$content/);  # uniquify the values
2096                         $string .= $child->textContent . " ";
2097                 }
2098                 if( ! @children ) {
2099                         $string .= $value->textContent . " ";
2100                 }
2101         }
2102         $string = NFD($string);
2103         $string =~ s/(\pM)//gso;
2104         return lc($string);
2105 }
2106
2107
2108 sub modsdoc_to_values {
2109         my( $self, $mods ) = @_;
2110         my $data = {};
2111         for my $class (keys %$xpathset) {
2112                 $data->{$class} = {};
2113                 for my $type (keys %{$xpathset->{$class}}) {
2114                         $data->{$class}->{$type} = {};
2115                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
2116                 }
2117         }
2118         return $data;
2119 }
2120
2121
2122 1;
2123
2124