292fbe00c3c16cb2e729896ef8e73b66d13a15ff
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / WoRM.pm
1 package OpenILS::Application::WoRM;
2 use base qw/OpenSRF::Application/;
3 use open qw/:utf8/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::FlatXML;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21
22 our $log = 'OpenSRF::Utils::Logger';
23 our $xml_util = OpenILS::Utils::FlatXML->new();
24
25 our $parser = XML::LibXML->new();
26 our $xslt = XML::LibXSLT->new();
27 our $mods_sheet;
28 our $mads_sheet;
29
30 our $st_sess;
31 sub st_sess {
32         my $self = shift;
33         my $sess = shift;
34         $st_sess = $sess if ($sess);
35         return $st_sess;
36 }
37
38 our $xpathset = {};
39
40 sub initialize {}
41 sub child_init {}
42
43 sub post_init {
44         $log->debug("Running post_init", DEBUG);
45
46         unless ($mods_sheet) {
47                 $log->debug("Loading MODS XSLT", DEBUG);
48                 my $xslt_doc = $parser->parse_file(
49                         OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
50                 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
51         }
52
53         #if (!__PACKAGE__->st_sess()) {
54         #       $log->debug("Creating cached storage server session", DEBUG);
55         #       __PACKAGE__->st_sess( OpenSRF::AppSession->create('open-ils.storage') );
56         #}
57
58         unless (keys %$xpathset) {
59                 my $req = __PACKAGE__->storage_req('open-ils.storage.direct.config.metabib_field.retrieve.all.atomic');
60                 for my $f (@$req) {
61                         $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
62                         $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
63                         $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
64                 }
65         }
66 }
67
68
69 sub in_transaction {
70         OpenILS::Application::WoRM->post_init();
71         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
72 }
73
74 sub begin_transaction {
75         my $self = shift;
76         my $client = shift;
77         
78         OpenILS::Application::WoRM->post_init();
79         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
80         
81         try {
82                 if (!$outer_xact) {
83                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
84                         #__PACKAGE__->st_sess->connect;
85                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
86                         unless (defined $r and $r) {
87                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
88                                 #__PACKAGE__->st_sess->disconnect;
89                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
90                         }
91                 }
92         } otherwise {
93                 $log->debug("WoRM Couldn't BEGIN transaction!", ERROR)
94         };
95
96         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
97 }
98
99 sub rollback_transaction {
100         my $self = shift;
101         my $client = shift;
102
103         OpenILS::Application::WoRM->post_init();
104         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
105
106         try {
107                 if ($outer_xact) {
108                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
109                 } else {
110                         $log->debug("WoRM isn't inside a transaction.", INFO);
111                 }
112         } catch Error with {
113                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
114         };
115
116         return 1;
117 }
118
119 sub commit_transaction {
120         my $self = shift;
121         my $client = shift;
122
123         OpenILS::Application::WoRM->post_init();
124         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
125
126         try {
127                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
128                 if ($outer_xact) {
129                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
130                         unless (defined $r and $r) {
131                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
132                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
133                         }
134                         #__PACKAGE__->st_sess->disconnect;
135                 } else {
136                         $log->debug("WoRM isn't inside a transaction.", INFO);
137                 }
138         } catch Error with {
139                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
140         };
141
142         return 1;
143 }
144
145 sub storage_req {
146         my $self = shift;
147         my $method = shift;
148         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
149         return shift( @res );
150 }
151
152 sub scrub_authority_record {
153         my $self = shift;
154         my $client = shift;
155         my $rec = shift;
156
157         my $commit = 0;
158         if (!OpenILS::Application::WoRM->in_transaction) {
159                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
160                 $commit = 1;
161         }
162
163         my $success = 1;
164         try {
165                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
166
167                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
168                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
169
170                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
171         } otherwise {
172                 $log->debug('Scrubbing failed : '.shift(), ERROR);
173                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
174                 $success = 0;
175         };
176
177         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
178         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
179         return $success;
180 }
181 __PACKAGE__->register_method(  
182         api_name        => "open-ils.worm.scrub.authority",
183         method          => "scrub_authority_record",
184         api_level       => 1,
185         argc            => 1,
186 );                      
187
188
189 sub scrub_metabib_record {
190         my $self = shift;
191         my $client = shift;
192         my $rec = shift;
193
194         my $commit = 0;
195         if (!OpenILS::Application::WoRM->in_transaction) {
196                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
197                 $commit = 1;
198         }
199
200         my $success = 1;
201         try {
202                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
203                 
204                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
205                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
206                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
207                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
208                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
209                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
210                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
211                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
212
213                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
214                 my $masters = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
215
216                 for my $mr (@$masters) {
217                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
218                         my $others = OpenILS::Application::WoRM->storage_req(
219                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
220
221                         if (@$others) {
222                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
223                                 $mr->master_record($others->[0]->source);
224                                 OpenILS::Application::WoRM->storage_req(
225                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
226                                         { id => $mr->id },
227                                         { master_record => $others->[0]->source }
228                                 );
229                         } else {
230                                 warn "Removing metarecord whose master is $rec";
231                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
232                                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
233                                 warn "Metarecord removed";
234                                 $log->debug( "Metarecord removed", DEBUG);
235                         }
236                 }
237
238                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
239
240         } otherwise {
241                 $log->debug('Scrubbing failed : '.shift(), ERROR);
242                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
243                 $success = 0;
244         };
245
246         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
247         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
248         return $success;
249 }
250 __PACKAGE__->register_method(  
251         api_name        => "open-ils.worm.scrub.biblio",
252         method          => "scrub_metabib_record",
253         api_level       => 1,
254         argc            => 1,
255 );                      
256
257 sub wormize_biblio_record {
258         my $self = shift;
259         my $client = shift;
260         my $rec = shift;
261
262         my $commit = 0;
263         if (!OpenILS::Application::WoRM->in_transaction) {
264                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
265                 $commit = 1;
266         }
267
268         my $success = 1;
269         try {
270                 # clean up the cruft
271                 unless ($self->api_name =~ /noscrub/o) {
272                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
273                 }
274
275                 # now redo 'em
276                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
277
278                 my @full_rec = ();
279                 my @rec_descriptor = ();
280                 my %field_entry = (
281                         title   => [],
282                         author  => [],
283                         subject => [],
284                         keyword => [],
285                         series  => [],
286                 );
287                 my @metarecord = ();
288                 my @source_map = ();
289                 for my $r (@$bibs) {
290                         my $xml = $parser->parse_string($r->marc);
291
292                         # the full_rec stuff
293                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
294                                 $fr->record( $r->id );
295                                 push @full_rec, $fr;
296                         }
297
298                         # the rec_descriptor stuff
299                         my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
300                         $rd->record( $r->id );
301                         push @rec_descriptor, $rd;
302                         
303                         # the indexing field entry stuff
304                         for my $class ( qw/title author subject keyword series/ ) {
305                                 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
306                                         $fe->source( $r->id );
307                                         push @{$field_entry{$class}}, $fe;
308                                 }
309                         }
310
311                         #update the fingerprint
312                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
313                         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.remote_update', { id => $r->id }, { fingerprint => $fp } );
314
315                         unless ($self->api_name =~ /nomap/o) {
316                                 my $mr = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp  )->[0];
317                                 
318                                 unless ($mr) {
319                                         $mr = Fieldmapper::metabib::metarecord->new;
320                                         $mr->fingerprint( $fp );
321                                         $mr->master_record( $r->id );
322                                         $mr->id( OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
323                                 }
324
325                                 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
326                                 $mr_map->metarecord( $mr->id );
327                                 $mr_map->source( $r->id );
328                                 push @source_map, $mr_map;
329                         }
330
331                 }
332
333                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
334
335                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create', @source_map ) if (@source_map);
336                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
337                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.batch.create', @full_rec ) if (@full_rec);
338                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.batch.create', @{ $field_entry{title} } ) if (@{ $field_entry{title} });
339                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.batch.create', @{ $field_entry{author} } ) if (@{ $field_entry{author} });
340                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create', @{ $field_entry{subject} } ) if (@{ $field_entry{subject} });
341                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create', @{ $field_entry{keyword} } ) if (@{ $field_entry{keyword} });
342                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.batch.create', @{ $field_entry{series} } ) if (@{ $field_entry{series} });
343
344                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
345
346         } otherwise {
347                 $log->debug('Wormization failed : '.shift(), ERROR);
348                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
349                 $success = 0;
350         };
351
352         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
353         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
354         return $success;
355 }
356 __PACKAGE__->register_method(
357         api_name        => "open-ils.worm.wormize.biblio",
358         method          => "wormize_biblio_record",
359         api_level       => 1,
360         argc            => 1,
361 );
362 __PACKAGE__->register_method(
363         api_name        => "open-ils.worm.wormize.biblio.nomap",
364         method          => "wormize_biblio_record",
365         api_level       => 1,
366         argc            => 1,
367 );
368 __PACKAGE__->register_method(
369         api_name        => "open-ils.worm.wormize.biblio.noscrub",
370         method          => "wormize_biblio_record",
371         api_level       => 1,
372         argc            => 1,
373 );
374 __PACKAGE__->register_method(
375         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
376         method          => "wormize_biblio_record",
377         api_level       => 1,
378         argc            => 1,
379 );
380
381 sub wormize_authority_record {
382         my $self = shift;
383         my $client = shift;
384         my $rec = shift;
385
386         my $commit = 0;
387         if (!OpenILS::Application::WoRM->in_transaction) {
388                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
389                 $commit = 1;
390         }
391
392         my $success = 1;
393         try {
394                 # clean up the cruft
395                 unless ($self->api_name =~ /noscrub/o) {
396                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
397                 }
398
399                 # now redo 'em
400                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
401
402                 my @full_rec = ();
403                 my @rec_descriptor = ();
404                 for my $r (@$bibs) {
405                         my $xml = $parser->parse_string($r->marc);
406
407                         # the full_rec stuff
408                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
409                                 $fr->record( $r->id );
410                                 push @full_rec, $fr;
411                         }
412
413                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
414                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
415                         #$rd->record( $r->id );
416                         #push @rec_descriptor, $rd;
417                         
418                 }
419
420                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
421
422                 #OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
423                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
424
425                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
426
427         } otherwise {
428                 $log->debug('Wormization failed : '.shift(), ERROR);
429                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
430                 $success = 0;
431         };
432
433         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
434         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
435         return $success;
436 }
437 __PACKAGE__->register_method(
438         api_name        => "open-ils.worm.wormize.authority",
439         method          => "wormize_authority_record",
440         api_level       => 1,
441         argc            => 1,
442 );
443 __PACKAGE__->register_method(
444         api_name        => "open-ils.worm.wormize.authority.noscrub",
445         method          => "wormize_authority_record",
446         api_level       => 1,
447         argc            => 1,
448 );
449
450
451 # --------------------------------------------------------------------------------
452 # MARC index extraction
453
454 package OpenILS::Application::WoRM::XPATH;
455 use base qw/OpenILS::Application::WoRM/;
456 use Unicode::Normalize;
457
458 # give this a MODS documentElement and an XPATH expression
459 sub _xpath_to_string {
460         my $xml = shift;
461         my $xpath = shift;
462         my $ns_uri = shift;
463         my $ns_prefix = shift;
464         my $unique = shift;
465
466         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
467
468         my $string = "";
469
470         # grab the set of matching nodes
471         my @nodes = $xml->findnodes( $xpath );
472         for my $value (@nodes) {
473
474                 # grab all children of the node
475                 my @children = $value->childNodes();
476                 for my $child (@children) {
477
478                         # add the childs content to the growing buffer
479                         my $content = quotemeta($child->textContent);
480                         next if ($unique && $string =~ /$content/);  # uniquify the values
481                         $string .= $child->textContent . " ";
482                 }
483                 if( ! @children ) {
484                         $string .= $value->textContent . " ";
485                 }
486         }
487         NFD($string);
488         $string =~ s/(\pM)//gso;
489         return $string;
490 }
491
492 sub class_all_index_string_xml {
493         my $self = shift;
494         my $client = shift;
495         my $xml = shift;
496         my $class = shift;
497
498         OpenILS::Application::WoRM->post_init();
499         $xml = $parser->parse_string($xml) unless (ref $xml);
500         
501         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
502         for my $type ( keys %{ $xpathset->{$class} } ) {
503                 my $value =  _xpath_to_string(
504                                 $mods_sheet->transform($xml)->documentElement,
505                                 $xpathset->{$class}->{$type}->{xpath},
506                                 "http://www.loc.gov/mods/",
507                                 "mods",
508                                 1
509                 );
510
511                 next unless $value;
512
513                 my $fm = $class_constructor->new;
514                 $fm->value( $value );
515                 $fm->field( $xpathset->{$class}->{$type}->{id} );
516                 $client->respond($fm);
517         }
518         return undef;
519 }
520 __PACKAGE__->register_method(  
521         api_name        => "open-ils.worm.field_entry.class.xml",
522         method          => "class_all_index_string_xml",
523         api_level       => 1,
524         argc            => 1,
525         stream          => 1,
526 );                      
527
528 sub class_all_index_string_record {
529         my $self = shift;
530         my $client = shift;
531         my $rec = shift;
532         my $class = shift;
533
534         OpenILS::Application::WoRM->post_init();
535         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
536
537         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
538                 $fm->source($rec);
539                 $client->respond($fm);
540         }
541         return undef;
542 }
543 __PACKAGE__->register_method(  
544         api_name        => "open-ils.worm.field_entry.class.record",
545         method          => "class_all_index_string_record",
546         api_level       => 1,
547         argc            => 1,
548         stream          => 1,
549 );                      
550
551
552 sub class_index_string_xml {
553         my $self = shift;
554         my $client = shift;
555         my $xml = shift;
556         my $class = shift;
557         my $type = shift;
558
559         OpenILS::Application::WoRM->post_init();
560         $xml = $parser->parse_string($xml) unless (ref $xml);
561         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
562 }
563 __PACKAGE__->register_method(  
564         api_name        => "open-ils.worm.class.type.xml",
565         method          => "class_index_string_xml",
566         api_level       => 1,
567         argc            => 1,
568 );                      
569
570 sub class_index_string_record {
571         my $self = shift;
572         my $client = shift;
573         my $rec = shift;
574         my $class = shift;
575         my $type = shift;
576
577         OpenILS::Application::WoRM->post_init();
578         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
579
580         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
581         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
582         return $d;
583 }
584 __PACKAGE__->register_method(  
585         api_name        => "open-ils.worm.class.type.record",
586         method          => "class_index_string_record",
587         api_level       => 1,
588         argc            => 1,
589 );                      
590
591 sub xml_xpath {
592         my $self = shift;
593         my $client = shift;
594         my $xml = shift;
595         my $xpath = shift;
596         my $uri = shift;
597         my $prefix = shift;
598         my $unique = shift;
599
600         OpenILS::Application::WoRM->post_init();
601         $xml = $parser->parse_string($xml) unless (ref $xml);
602         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
603 }
604 __PACKAGE__->register_method(  
605         api_name        => "open-ils.worm.xpath.xml",
606         method          => "xml_xpath",
607         api_level       => 1,
608         argc            => 1,
609 );                      
610
611 sub record_xpath {
612         my $self = shift;
613         my $client = shift;
614         my $rec = shift;
615         my $xpath = shift;
616         my $uri = shift;
617         my $prefix = shift;
618         my $unique = shift;
619
620         OpenILS::Application::WoRM->post_init();
621         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
622
623         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
624         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
625         return $d;
626 }
627 __PACKAGE__->register_method(  
628         api_name        => "open-ils.worm.xpath.record",
629         method          => "record_xpath",
630         api_level       => 1,
631         argc            => 1,
632 );                      
633
634
635 # --------------------------------------------------------------------------------
636 # MARC Descriptor
637
638 package OpenILS::Application::WoRM::Biblio::Leader;
639 use base qw/OpenILS::Application::WoRM/;
640 use Unicode::Normalize;
641
642 our %biblio_descriptor_code = (
643         item_type => sub { substr($ldr,6,1); },
644         item_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/o) ? substr($oo8,29,1) : substr($oo8,23,1); },
645         bib_level => sub { substr($ldr,7,1); },
646         control_type => sub { substr($ldr,8,1); },
647         char_encoding => sub { substr($ldr,9,1); },
648         enc_level => sub { substr($ldr,17,1); },
649         cat_form => sub { substr($ldr,18,1); },
650         pub_status => sub { substr($ldr,5,1); },
651         item_lang => sub { substr($oo8,35,3); },
652         #lit_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"; },
653         audience => sub { substr($oo8,22,1); },
654 );
655
656 sub _extract_biblio_descriptors {
657         my $xml = shift;
658
659         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
660         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
661
662         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
663         for my $rd_field ( keys %biblio_descriptor_code ) {
664                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
665         }
666
667         return $rd_obj;
668 }
669
670 sub extract_biblio_desc_xml {
671         my $self = shift;
672         my $client = shift;
673         my $xml = shift;
674
675         $xml = $parser->parse_string($xml) unless (ref $xml);
676
677         return _extract_biblio_descriptors( $xml );
678 }
679 __PACKAGE__->register_method(  
680         api_name        => "open-ils.worm.biblio_leader.xml",
681         method          => "extract_biblio_desc_xml",
682         api_level       => 1,
683         argc            => 1,
684 );                      
685
686 sub extract_biblio_desc_record {
687         my $self = shift;
688         my $client = shift;
689         my $rec = shift;
690
691         OpenILS::Application::WoRM->post_init();
692         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
693
694         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
695         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
696         return $d;
697 }
698 __PACKAGE__->register_method(  
699         api_name        => "open-ils.worm.biblio_leader.record",
700         method          => "extract_biblio_desc_record",
701         api_level       => 1,
702         argc            => 1,
703 );                      
704
705 # --------------------------------------------------------------------------------
706 # Flat MARC
707
708 package OpenILS::Application::WoRM::FlatMARC;
709 use base qw/OpenILS::Application::WoRM/;
710 use Unicode::Normalize;
711
712
713 sub _marcxml_to_full_rows {
714
715         my $marcxml = shift;
716         my $xmltype = shift || 'metabib';
717
718         my $type = "Fieldmapper::${xmltype}::full_rec";
719
720         my @ns_list;
721         
722         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
723
724         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
725                 next unless $tagline;
726
727                 my $ns = $type->new;
728
729                 $ns->tag( 'LDR' );
730                 my $val = $tagline->textContent;
731                 NFD($val);
732                 $val =~ s/(\pM+)//gso;
733                 $ns->value( $val );
734
735                 push @ns_list, $ns;
736         }
737
738         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
739                 next unless $tagline;
740
741                 my $ns = $type->new;
742
743                 $ns->tag( $tagline->getAttribute( "tag" ) );
744                 my $val = $tagline->textContent;
745                 NFD($val);
746                 $val =~ s/(\pM+)//gso;
747                 $ns->value( $val );
748
749                 push @ns_list, $ns;
750         }
751
752         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
753                 next unless $tagline;
754
755                 my $tag = $tagline->getAttribute( "tag" );
756                 my $ind1 = $tagline->getAttribute( "ind1" );
757                 my $ind2 = $tagline->getAttribute( "ind2" );
758
759                 for my $data ( $tagline->childNodes ) {
760                         next unless $data;
761
762                         my $ns = $type->new;
763
764                         $ns->tag( $tag );
765                         $ns->ind1( $ind1 );
766                         $ns->ind2( $ind2 );
767                         $ns->subfield( $data->getAttribute( "code" ) );
768                         my $val = $data->textContent;
769                         NFD($val);
770                         $val =~ s/(\pM+)//gso;
771                         $ns->value( lc($val) );
772
773                         push @ns_list, $ns;
774                 }
775         }
776
777         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
778         return @ns_list;
779 }
780
781 sub flat_marc_xml {
782         my $self = shift;
783         my $client = shift;
784         my $xml = shift;
785
786         $xml = $parser->parse_string($xml) unless (ref $xml);
787
788         my $type = 'metabib';
789         $type = 'authority' if ($self->api_name =~ /authority/o);
790
791         OpenILS::Application::WoRM->post_init();
792
793         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
794         return undef;
795 }
796 __PACKAGE__->register_method(  
797         api_name        => "open-ils.worm.flat_marc.authority.xml",
798         method          => "flat_marc_xml",
799         api_level       => 1,
800         argc            => 1,
801         stream          => 1,
802 );                      
803 __PACKAGE__->register_method(  
804         api_name        => "open-ils.worm.flat_marc.biblio.xml",
805         method          => "flat_marc_xml",
806         api_level       => 1,
807         argc            => 1,
808         stream          => 1,
809 );                      
810
811 sub flat_marc_record {
812         my $self = shift;
813         my $client = shift;
814         my $rec = shift;
815
816         my $type = 'biblio';
817         $type = 'authority' if ($self->api_name =~ /authority/o);
818
819         OpenILS::Application::WoRM->post_init();
820         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
821
822         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
823         return undef;
824 }
825 __PACKAGE__->register_method(  
826         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
827         method          => "flat_marc_record",
828         api_level       => 1,
829         argc            => 1,
830         stream          => 1,
831 );                      
832 __PACKAGE__->register_method(  
833         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
834         method          => "flat_marc_record",
835         api_level       => 1,
836         argc            => 1,
837         stream          => 1,
838 );                      
839
840
841 # --------------------------------------------------------------------------------
842 # Fingerprinting
843
844 package OpenILS::Application::WoRM::Biblio::Fingerprint;
845 use base qw/OpenILS::Application::WoRM/;
846 use Unicode::Normalize;
847 use OpenSRF::EX qw/:try/;
848
849 my @fp_mods_xpath = (
850         '//mods:mods/mods:typeOfResource[text()="text"]' => [
851                         title   => {
852                                         xpath   => [
853                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
854                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
855                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
856                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
857                                         ],
858                                         fixup   => sub {
859                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
860                                                         NFD($text);
861                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
862                                                         $text =~ s/\pM+//gso;
863                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
864                                                         $text = lc($text);
865                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
866                                                         $text =~ s/\s+/ /sgo;
867                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
868                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
869                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
870                                                         $text =~ s/\b(?:the|an?)\b//sgo;
871                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
872                                                         $text =~ s/\[.[^\]]+\]//sgo;
873                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
874                                                         $text =~ s/\s*[;\/\.]*$//sgo;
875                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
876                                                 },
877                         },
878                         author  => {
879                                         xpath   => [
880                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
881                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
882                                         ],
883                                         fixup   => sub {
884                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
885                                                         NFD($text);
886                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
887                                                         $text =~ s/\pM+//gso;
888                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
889                                                         $text = lc($text);
890                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
891                                                         $text =~ s/\s+/ /sgo;
892                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
893                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
894                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
895                                                         $text =~ s/,?\s+.*$//sgo;
896                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
897                                                 },
898                         },
899         ],
900
901         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
902                         title   => {
903                                         xpath   => [
904                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
905                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
906                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
907                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
908                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
909                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
910                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
911                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
912                                         ],
913                                         fixup   => sub {
914                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
915                                                         NFD($text);
916                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
917                                                         $text =~ s/\pM+//gso;
918                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
919                                                         $text = lc($text);
920                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
921                                                         $text =~ s/\s+/ /sgo;
922                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
923                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
924                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
925                                                         $text =~ s/\b(?:the|an?)\b//sgo;
926                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
927                                                         $text =~ s/\[.[^\]]+\]//sgo;
928                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
929                                                         $text =~ s/\s*[;\/\.]*$//sgo;
930                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
931                                                 },
932                         },
933                         author  => {
934                                         xpath   => [
935                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
936                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
937                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
938                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
939                                         ],
940                                         fixup   => sub {
941                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
942                                                         NFD($text);
943                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
944                                                         $text =~ s/\pM+//gso;
945                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
946                                                         $text = lc($text);
947                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
948                                                         $text =~ s/\s+/ /sgo;
949                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
950                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
951                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
952                                                         $text =~ s/,?\s+.*$//sgo;
953                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
954                                                 },
955                         },
956         ],
957
958 );
959
960 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
961
962 sub _fp_mods {
963         my $mods = shift;
964         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
965
966         my $fp_string = '';
967
968         my $match_index = 0;
969         my $block_index = 1;
970         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
971                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
972
973                         my $block_name_index = 0;
974                         my $block_value_index = 1;
975                         my $block = $fp_mods_xpath[$block_index];
976                         while ( my $part = $$block[$block_value_index] ) {
977                                 local $text;
978                                 for my $xpath ( @{ $part->{xpath} } ) {
979                                         $text = $mods->findvalue( $xpath );
980                                         last if ($text);
981                                 }
982
983                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
984
985                                 if ($text) {
986                                         $$part{fixup}->();
987                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
988                                         $fp_string .= $text;
989                                 }
990
991                                 $block_name_index += 2;
992                                 $block_value_index += 2;
993                         }
994                 }
995                 if ($fp_string) {
996                         $fp_string =~ s/\W+//gso;
997                         $log->debug("Fingerprint is [$fp_string]", INFO);;
998                         return $fp_string;
999                 }
1000
1001                 $match_index += 2;
1002                 $block_index += 2;
1003         }
1004         return undef;
1005 }
1006
1007 sub refingerprint_bibrec {
1008         my $self = shift;
1009         my $client = shift;
1010         my $rec = shift;
1011
1012         my $commit = 0;
1013         if (!OpenILS::Application::WoRM->in_transaction) {
1014                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1015                 $commit = 1;
1016         }
1017
1018         my $success = 1;
1019         try {
1020                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1021                 OpenILS::Application::WoRM->storage_req(
1022                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1023                         { id => $_->id },
1024                         { fingerprint => $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $_->marc ) }
1025                 ) for (@$bibs);
1026         } otherwise {
1027                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1028                 $success = 0;
1029         };
1030
1031         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
1032         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
1033         return $success;
1034 }
1035 __PACKAGE__->register_method(  
1036         api_name        => "open-ils.worm.fingerprint.record.update",
1037         method          => "refingerprint_bibrec",
1038         api_level       => 1,
1039         argc            => 1,
1040 );                      
1041
1042
1043 sub fingerprint_bibrec {
1044         my $self = shift;
1045         my $client = shift;
1046         my $rec = shift;
1047
1048         OpenILS::Application::WoRM->post_init();
1049         my $r = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1050
1051         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1052         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1053         return $fp;
1054
1055 }
1056 __PACKAGE__->register_method(  
1057         api_name        => "open-ils.worm.fingerprint.record",
1058         method          => "fingerprint_bibrec",
1059         api_level       => 1,
1060         argc            => 1,
1061 );                      
1062
1063 sub fingerprint_mods {
1064         my $self = shift;
1065         my $client = shift;
1066         my $xml = shift;
1067
1068         OpenILS::Application::WoRM->post_init();
1069         my $mods = $parser->parse_string($xml)->documentElement;
1070
1071         return _fp_mods( $mods );
1072 }
1073 __PACKAGE__->register_method(  
1074         api_name        => "open-ils.worm.fingerprint.mods",
1075         method          => "fingerprint_mods",
1076         api_level       => 1,
1077         argc            => 1,
1078 );                      
1079
1080 sub fingerprint_marc {
1081         my $self = shift;
1082         my $client = shift;
1083         my $xml = shift;
1084
1085         $xml = $parser->parse_string($xml) unless (ref $xml);
1086
1087         OpenILS::Application::WoRM->post_init();
1088         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1089         $log->debug("Returning [$fp] as fingerprint", INFO);
1090         return $fp;
1091 }
1092 __PACKAGE__->register_method(  
1093         api_name        => "open-ils.worm.fingerprint.marc",
1094         method          => "fingerprint_marc",
1095         api_level       => 1,
1096         argc            => 1,
1097 );                      
1098
1099
1100 # --------------------------------------------------------------------------------
1101
1102 1;
1103 __END__
1104 my $in_xact;
1105 my $begin;
1106 my $commit;
1107 my $rollback;
1108 my $lookup;
1109 my $update_entry;
1110 my $mr_lookup;
1111 my $mr_update;
1112 my $mr_create;
1113 my $create_source_map;
1114 my $sm_lookup;
1115 my $rm_old_rd;
1116 my $rm_old_sm;
1117 my $rm_old_fr;
1118 my $rm_old_tr;
1119 my $rm_old_ar;
1120 my $rm_old_sr;
1121 my $rm_old_kr;
1122 my $rm_old_ser;
1123
1124 my $fr_create;
1125 my $rd_create;
1126 my $create = {};
1127
1128 my %descriptor_code = (
1129         item_type => 'substr($ldr,6,1)',
1130         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1131         bib_level => 'substr($ldr,7,1)',
1132         control_type => 'substr($ldr,8,1)',
1133         char_encoding => 'substr($ldr,9,1)',
1134         enc_level => 'substr($ldr,17,1)',
1135         cat_form => 'substr($ldr,18,1)',
1136         pub_status => 'substr($ldr,5,1)',
1137         item_lang => 'substr($oo8,35,3)',
1138         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1139         audience => 'substr($oo8,22,1)',
1140 );
1141
1142 sub wormize {
1143
1144         my $self = shift;
1145         my $client = shift;
1146         my @docids = @_;
1147
1148         my $no_map = 0;
1149         if ($self->api_name =~ /no_map/o) {
1150                 $no_map = 1;
1151         }
1152
1153         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1154                 unless ($in_xact);
1155         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1156                 unless ($begin);
1157         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1158                 unless ($commit);
1159         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1160                 unless ($rollback);
1161         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
1162                 unless ($sm_lookup);
1163         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
1164                 unless ($mr_lookup);
1165         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
1166                 unless ($mr_update);
1167         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
1168                 unless ($lookup);
1169         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
1170                 unless ($update_entry);
1171         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
1172                 unless ($rm_old_sm);
1173         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
1174                 unless ($rm_old_rd);
1175         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
1176                 unless ($rm_old_fr);
1177         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
1178                 unless ($rm_old_tr);
1179         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
1180                 unless ($rm_old_ar);
1181         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
1182                 unless ($rm_old_sr);
1183         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
1184                 unless ($rm_old_kr);
1185         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
1186                 unless ($rm_old_ser);
1187         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
1188                 unless ($mr_create);
1189         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
1190                 unless ($create_source_map);
1191         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
1192                 unless ($rd_create);
1193         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
1194                 unless ($fr_create);
1195         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
1196                 unless ($$create{title});
1197         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
1198                 unless ($$create{author});
1199         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
1200                 unless ($$create{subject});
1201         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
1202                 unless ($$create{keyword});
1203         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
1204                 unless ($$create{series});
1205
1206
1207         my ($outer_xact) = $in_xact->run;
1208         try {
1209                 unless ($outer_xact) {
1210                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1211                         my ($r) = $begin->run($client);
1212                         unless (defined $r and $r) {
1213                                 $rollback->run;
1214                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1215                         }
1216                 }
1217         } catch Error with {
1218                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1219         };
1220
1221         my @source_maps;
1222         my @entry_list;
1223         my @mr_list;
1224         my @rd_list;
1225         my @ns_list;
1226         my @mods_data;
1227         my $ret = 0;
1228         for my $entry ( $lookup->run(@docids) ) {
1229                 # step -1: grab the doc from storage
1230                 next unless ($entry);
1231
1232                 if(!$mods_sheet) {
1233                         my $xslt_doc = $parser->parse_file(
1234                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1235                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
1236                 }
1237
1238                 my $xml = $entry->marc;
1239                 my $docid = $entry->id;
1240                 my $marcdoc = $parser->parse_string($xml);
1241                 my $modsdoc = $mods_sheet->transform($marcdoc);
1242
1243                 my $mods = $modsdoc->documentElement;
1244                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1245
1246                 $entry->fingerprint( fingerprint_mods( $mods ) );
1247                 push @entry_list, $entry;
1248
1249                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
1250
1251                 unless ($no_map) {
1252                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
1253                         if (!$mr || !@$mr) {
1254                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
1255                                 $mr = new Fieldmapper::metabib::metarecord;
1256                                 $mr->fingerprint( $entry->fingerprint );
1257                                 $mr->master_record( $entry->id );
1258                                 my ($new_mr) = $mr_create->run($mr);
1259                                 $mr->id($new_mr);
1260                                 unless (defined $mr) {
1261                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
1262                                 }
1263                         } else {
1264                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
1265                                 $mr->mods('');
1266                                 push @mr_list, $mr;
1267                         }
1268
1269                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
1270                         $sm->metarecord( $mr->id );
1271                         $sm->source( $entry->id );
1272                         push @source_maps, $sm;
1273                 }
1274
1275                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1276                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1277
1278                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1279                 for my $rd_field ( keys %descriptor_code ) {
1280                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1281                 }
1282                 $rd_obj->record( $docid );
1283                 push @rd_list, $rd_obj;
1284
1285                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
1286
1287                 # step 2: build the KOHA rows
1288                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
1289                 $_->record( $docid ) for (@tmp_list);
1290                 push @ns_list, @tmp_list;
1291
1292                 $ret++;
1293
1294                 last unless ($self->api_name =~ /batch$/o);
1295         }
1296
1297         $rm_old_rd->run( { record => \@docids } );
1298         $rm_old_fr->run( { record => \@docids } );
1299         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
1300         $rm_old_tr->run( { source => \@docids } );
1301         $rm_old_ar->run( { source => \@docids } );
1302         $rm_old_sr->run( { source => \@docids } );
1303         $rm_old_kr->run( { source => \@docids } );
1304         $rm_old_ser->run( { source => \@docids } );
1305
1306         unless ($no_map) {
1307                 my ($sm) = $create_source_map->run(@source_maps);
1308                 unless (defined $sm) {
1309                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
1310                 }
1311                 my ($mr) = $mr_update->run(@mr_list);
1312                 unless (defined $mr) {
1313                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
1314                 }
1315         }
1316
1317         my ($re) = $update_entry->run(@entry_list);
1318         unless (defined $re) {
1319                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
1320         }
1321
1322         my ($rd) = $rd_create->run(@rd_list);
1323         unless (defined $rd) {
1324                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
1325         }
1326
1327         my ($fr) = $fr_create->run(@ns_list);
1328         unless (defined $fr) {
1329                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
1330         }
1331
1332         # step 5: insert the new metadata
1333         for my $class ( qw/title author subject keyword series/ ) {
1334                 my @md_list = ();
1335                 for my $doc ( @mods_data ) {
1336                         my ($did) = keys %$doc;
1337                         my ($data) = values %$doc;
1338
1339                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
1340                         for my $row ( keys %{ $$data{$class} } ) {
1341                                 next unless (exists $$data{$class}{$row});
1342                                 next unless ($$data{$class}{$row}{value});
1343                                 my $fm_obj = $fm_constructor->new;
1344                                 $fm_obj->value( $$data{$class}{$row}{value} );
1345                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
1346                                 $fm_obj->source( $did );
1347                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
1348
1349                                 push @md_list, $fm_obj;
1350                         }
1351                 }
1352                         
1353                 my ($cr) = $$create{$class}->run(@md_list);
1354                 unless (defined $cr) {
1355                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
1356                 }
1357         }
1358
1359         unless ($outer_xact) {
1360                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1361                 my ($c) = $commit->run;
1362                 unless (defined $c and $c) {
1363                         $rollback->run;
1364                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1365                 }
1366         }
1367
1368         return $ret;
1369 }
1370 __PACKAGE__->register_method( 
1371         api_name        => "open-ils.worm.wormize",
1372         method          => "wormize",
1373         api_level       => 1,
1374         argc            => 1,
1375 );
1376 __PACKAGE__->register_method( 
1377         api_name        => "open-ils.worm.wormize.no_map",
1378         method          => "wormize",
1379         api_level       => 1,
1380         argc            => 1,
1381 );
1382 __PACKAGE__->register_method( 
1383         api_name        => "open-ils.worm.wormize.batch",
1384         method          => "wormize",
1385         api_level       => 1,
1386         argc            => 1,
1387 );
1388 __PACKAGE__->register_method( 
1389         api_name        => "open-ils.worm.wormize.no_map.batch",
1390         method          => "wormize",
1391         api_level       => 1,
1392         argc            => 1,
1393 );
1394
1395
1396 my $ain_xact;
1397 my $abegin;
1398 my $acommit;
1399 my $arollback;
1400 my $alookup;
1401 my $aupdate_entry;
1402 my $amr_lookup;
1403 my $amr_update;
1404 my $amr_create;
1405 my $acreate_source_map;
1406 my $asm_lookup;
1407 my $arm_old_rd;
1408 my $arm_old_sm;
1409 my $arm_old_fr;
1410 my $arm_old_tr;
1411 my $arm_old_ar;
1412 my $arm_old_sr;
1413 my $arm_old_kr;
1414 my $arm_old_ser;
1415
1416 my $afr_create;
1417 my $ard_create;
1418 my $acreate = {};
1419
1420 sub authority_wormize {
1421
1422         my $self = shift;
1423         my $client = shift;
1424         my @docids = @_;
1425
1426         my $no_map = 0;
1427         if ($self->api_name =~ /no_map/o) {
1428                 $no_map = 1;
1429         }
1430
1431         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1432                 unless ($in_xact);
1433         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1434                 unless ($begin);
1435         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1436                 unless ($commit);
1437         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1438                 unless ($rollback);
1439         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1440                 unless ($alookup);
1441         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1442                 unless ($aupdate_entry);
1443         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1444                 unless ($arm_old_rd);
1445         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1446                 unless ($arm_old_fr);
1447         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1448                 unless ($ard_create);
1449         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1450                 unless ($afr_create);
1451
1452
1453         my ($outer_xact) = $in_xact->run;
1454         try {
1455                 unless ($outer_xact) {
1456                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1457                         my ($r) = $begin->run($client);
1458                         unless (defined $r and $r) {
1459                                 $rollback->run;
1460                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1461                         }
1462                 }
1463         } catch Error with {
1464                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1465         };
1466
1467         my @source_maps;
1468         my @entry_list;
1469         my @mr_list;
1470         my @rd_list;
1471         my @ns_list;
1472         my @mads_data;
1473         my $ret = 0;
1474         for my $entry ( $lookup->run(@docids) ) {
1475                 # step -1: grab the doc from storage
1476                 next unless ($entry);
1477
1478                 #if(!$mads_sheet) {
1479                 #       my $xslt_doc = $parser->parse_file(
1480                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1481                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1482                 #}
1483
1484                 my $xml = $entry->marc;
1485                 my $docid = $entry->id;
1486                 my $marcdoc = $parser->parse_string($xml);
1487                 #my $madsdoc = $mads_sheet->transform($marcdoc);
1488
1489                 #my $mads = $madsdoc->documentElement;
1490                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1491
1492                 push @entry_list, $entry;
1493
1494                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1495                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1496
1497                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1498                 for my $rd_field ( keys %descriptor_code ) {
1499                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1500                 }
1501                 $rd_obj->record( $docid );
1502                 push @rd_list, $rd_obj;
1503
1504                 # step 2: build the KOHA rows
1505                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1506                 $_->record( $docid ) for (@tmp_list);
1507                 push @ns_list, @tmp_list;
1508
1509                 $ret++;
1510
1511                 last unless ($self->api_name =~ /batch$/o);
1512         }
1513
1514         $arm_old_rd->run( { record => \@docids } );
1515         $arm_old_fr->run( { record => \@docids } );
1516
1517         my ($rd) = $ard_create->run(@rd_list);
1518         unless (defined $rd) {
1519                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1520         }
1521
1522         my ($fr) = $fr_create->run(@ns_list);
1523         unless (defined $fr) {
1524                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1525         }
1526
1527         unless ($outer_xact) {
1528                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1529                 my ($c) = $commit->run;
1530                 unless (defined $c and $c) {
1531                         $rollback->run;
1532                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1533                 }
1534         }
1535
1536         return $ret;
1537 }
1538 __PACKAGE__->register_method( 
1539         api_name        => "open-ils.worm.authortiy.wormize",
1540         method          => "wormize",
1541         api_level       => 1,
1542         argc            => 1,
1543 );
1544 __PACKAGE__->register_method( 
1545         api_name        => "open-ils.worm.authority.wormize.batch",
1546         method          => "wormize",
1547         api_level       => 1,
1548         argc            => 1,
1549 );
1550
1551
1552 # --------------------------------------------------------------------------------
1553
1554
1555 sub _marcxml_to_full_rows {
1556
1557         my $marcxml = shift;
1558         my $type = shift || 'Fieldmapper::metabib::full_rec';
1559
1560         my @ns_list;
1561         
1562         my $root = $marcxml->documentElement;
1563
1564         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1565                 next unless $tagline;
1566
1567                 my $ns = new Fieldmapper::metabib::full_rec;
1568
1569                 $ns->tag( 'LDR' );
1570                 my $val = NFD($tagline->textContent);
1571                 $val =~ s/(\pM+)//gso;
1572                 $ns->value( $val );
1573
1574                 push @ns_list, $ns;
1575         }
1576
1577         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1578                 next unless $tagline;
1579
1580                 my $ns = new Fieldmapper::metabib::full_rec;
1581
1582                 $ns->tag( $tagline->getAttribute( "tag" ) );
1583                 my $val = NFD($tagline->textContent);
1584                 $val =~ s/(\pM+)//gso;
1585                 $ns->value( $val );
1586
1587                 push @ns_list, $ns;
1588         }
1589
1590         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1591                 next unless $tagline;
1592
1593                 my $tag = $tagline->getAttribute( "tag" );
1594                 my $ind1 = $tagline->getAttribute( "ind1" );
1595                 my $ind2 = $tagline->getAttribute( "ind2" );
1596
1597                 for my $data ( $tagline->childNodes ) {
1598                         next unless $data;
1599
1600                         my $ns = $type->new;
1601
1602                         $ns->tag( $tag );
1603                         $ns->ind1( $ind1 );
1604                         $ns->ind2( $ind2 );
1605                         $ns->subfield( $data->getAttribute( "code" ) );
1606                         my $val = NFD($data->textContent);
1607                         $val =~ s/(\pM+)//gso;
1608                         $ns->value( lc($val) );
1609
1610                         push @ns_list, $ns;
1611                 }
1612         }
1613         return @ns_list;
1614 }
1615
1616 sub _get_field_value {
1617
1618         my( $root, $xpath ) = @_;
1619
1620         my $string = "";
1621
1622         # grab the set of matching nodes
1623         my @nodes = $root->findnodes( $xpath );
1624         for my $value (@nodes) {
1625
1626                 # grab all children of the node
1627                 my @children = $value->childNodes();
1628                 for my $child (@children) {
1629
1630                         # add the childs content to the growing buffer
1631                         my $content = quotemeta($child->textContent);
1632                         next if ($string =~ /$content/);  # uniquify the values
1633                         $string .= $child->textContent . " ";
1634                 }
1635                 if( ! @children ) {
1636                         $string .= $value->textContent . " ";
1637                 }
1638         }
1639         $string = NFD($string);
1640         $string =~ s/(\pM)//gso;
1641         return lc($string);
1642 }
1643
1644
1645 sub modsdoc_to_values {
1646         my( $self, $mods ) = @_;
1647         my $data = {};
1648         for my $class (keys %$xpathset) {
1649                 $data->{$class} = {};
1650                 for my $type (keys %{$xpathset->{$class}}) {
1651                         $data->{$class}->{$type} = {};
1652                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
1653                 }
1654         }
1655         return $data;
1656 }
1657
1658
1659 1;
1660
1661