strip bad chars from class-indexed data and remove mvr uppon metarecord remapping
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / WoRM.pm
1 package OpenILS::Application::WoRM;
2 use base qw/OpenSRF::Application/;
3 use open qw/:utf8/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::FlatXML;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21
22 our $log = 'OpenSRF::Utils::Logger';
23 our $xml_util = OpenILS::Utils::FlatXML->new();
24
25 our $parser = XML::LibXML->new();
26 our $xslt = XML::LibXSLT->new();
27 our $mods_sheet;
28 our $mads_sheet;
29
30 our $st_sess;
31 sub st_sess {
32         my $self = shift;
33         my $sess = shift;
34         $st_sess = $sess if ($sess);
35         return $st_sess;
36 }
37
38 our $xpathset = {};
39
40 sub initialize {}
41 sub child_init {}
42
43 sub post_init {
44         $log->debug("Running post_init", DEBUG);
45
46         unless ($mods_sheet) {
47                 $log->debug("Loading MODS XSLT", DEBUG);
48                 my $xslt_doc = $parser->parse_file(
49                         OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
50                 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
51         }
52
53         #if (!__PACKAGE__->st_sess()) {
54         #       $log->debug("Creating cached storage server session", DEBUG);
55         #       __PACKAGE__->st_sess( OpenSRF::AppSession->create('open-ils.storage') );
56         #}
57
58         unless (keys %$xpathset) {
59                 my $req = __PACKAGE__->storage_req('open-ils.storage.direct.config.metabib_field.retrieve.all.atomic');
60                 for my $f (@$req) {
61                         $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
62                         $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
63                         $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
64                 }
65         }
66 }
67
68
69 sub in_transaction {
70         OpenILS::Application::WoRM->post_init();
71         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
72 }
73
74 sub begin_transaction {
75         my $self = shift;
76         my $client = shift;
77         
78         OpenILS::Application::WoRM->post_init();
79         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
80         
81         try {
82                 if (!$outer_xact) {
83                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
84                         #__PACKAGE__->st_sess->connect;
85                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
86                         unless (defined $r and $r) {
87                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
88                                 #__PACKAGE__->st_sess->disconnect;
89                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
90                         }
91                 }
92         } otherwise {
93                 $log->debug("WoRM Couldn't BEGIN transaction!", ERROR)
94         };
95
96         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
97 }
98
99 sub rollback_transaction {
100         my $self = shift;
101         my $client = shift;
102
103         OpenILS::Application::WoRM->post_init();
104         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
105
106         try {
107                 if ($outer_xact) {
108                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
109                 } else {
110                         $log->debug("WoRM isn't inside a transaction.", INFO);
111                 }
112         } catch Error with {
113                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
114         };
115
116         return 1;
117 }
118
119 sub commit_transaction {
120         my $self = shift;
121         my $client = shift;
122
123         OpenILS::Application::WoRM->post_init();
124         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
125
126         try {
127                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
128                 if ($outer_xact) {
129                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
130                         unless (defined $r and $r) {
131                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
132                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
133                         }
134                         #__PACKAGE__->st_sess->disconnect;
135                 } else {
136                         $log->debug("WoRM isn't inside a transaction.", INFO);
137                 }
138         } catch Error with {
139                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
140         };
141
142         return 1;
143 }
144
145 sub storage_req {
146         my $self = shift;
147         my $method = shift;
148         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
149         return shift( @res );
150 }
151
152 sub scrub_authority_record {
153         my $self = shift;
154         my $client = shift;
155         my $rec = shift;
156
157         my $commit = 0;
158         if (!OpenILS::Application::WoRM->in_transaction) {
159                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
160                 $commit = 1;
161         }
162
163         my $success = 1;
164         try {
165                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
166
167                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
168                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
169
170                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
171         } otherwise {
172                 $log->debug('Scrubbing failed : '.shift(), ERROR);
173                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
174                 $success = 0;
175         };
176
177         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
178         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
179         return $success;
180 }
181 __PACKAGE__->register_method(  
182         api_name        => "open-ils.worm.scrub.authority",
183         method          => "scrub_authority_record",
184         api_level       => 1,
185         argc            => 1,
186 );                      
187
188
189 sub scrub_metabib_record {
190         my $self = shift;
191         my $client = shift;
192         my $rec = shift;
193
194         my $commit = 0;
195         if (!OpenILS::Application::WoRM->in_transaction) {
196                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
197                 $commit = 1;
198         }
199
200         my $success = 1;
201         try {
202                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
203                 
204                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
205                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
206                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
207                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
208                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
209                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
210                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
211                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
212
213                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
214                 my $masters = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
215
216                 for my $mr (@$masters) {
217                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
218                         my $others = OpenILS::Application::WoRM->storage_req(
219                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
220
221                         if (@$others) {
222                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
223                                 $mr->master_record($others->[0]->source);
224                                 OpenILS::Application::WoRM->storage_req(
225                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
226                                         { id => $mr->id },
227                                         { master_record => $others->[0]->source, mods => undef }
228                                 );
229                         } else {
230                                 warn "Removing metarecord whose master is $rec";
231                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
232                                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
233                                 warn "Metarecord removed";
234                                 $log->debug( "Metarecord removed", DEBUG);
235                         }
236                 }
237
238                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
239
240         } otherwise {
241                 $log->debug('Scrubbing failed : '.shift(), ERROR);
242                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
243                 $success = 0;
244         };
245
246         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
247         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
248         return $success;
249 }
250 __PACKAGE__->register_method(  
251         api_name        => "open-ils.worm.scrub.biblio",
252         method          => "scrub_metabib_record",
253         api_level       => 1,
254         argc            => 1,
255 );                      
256
257 sub wormize_biblio_record {
258         my $self = shift;
259         my $client = shift;
260         my $rec = shift;
261
262         my $commit = 0;
263         if (!OpenILS::Application::WoRM->in_transaction) {
264                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
265                 $commit = 1;
266         }
267
268         my $success = 1;
269         try {
270                 # clean up the cruft
271                 unless ($self->api_name =~ /noscrub/o) {
272                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
273                 }
274
275                 # now redo 'em
276                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
277
278                 my @full_rec = ();
279                 my @rec_descriptor = ();
280                 my %field_entry = (
281                         title   => [],
282                         author  => [],
283                         subject => [],
284                         keyword => [],
285                         series  => [],
286                 );
287                 my @metarecord = ();
288                 my @source_map = ();
289                 for my $r (@$bibs) {
290                         my $xml = $parser->parse_string($r->marc);
291
292                         # the full_rec stuff
293                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
294                                 $fr->record( $r->id );
295                                 push @full_rec, $fr;
296                         }
297
298                         # the rec_descriptor stuff
299                         my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
300                         $rd->record( $r->id );
301                         push @rec_descriptor, $rd;
302                         
303                         # the indexing field entry stuff
304                         for my $class ( qw/title author subject keyword series/ ) {
305                                 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
306                                         $fe->source( $r->id );
307                                         push @{$field_entry{$class}}, $fe;
308                                 }
309                         }
310
311                         #update the fingerprint
312                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
313                         OpenILS::Application::WoRM->storage_req(
314                                 'open-ils.storage.direct.biblio.record_entry.remote_update',
315                                 { id => $r->id },
316                                 { fingerprint => $fp }
317                         ) if ($fp ne $r->fingerprint);
318
319                         unless ($self->api_name =~ /nomap/o) {
320                                 my $mr = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp  )->[0];
321                                 
322                                 unless ($mr) {
323                                         $mr = Fieldmapper::metabib::metarecord->new;
324                                         $mr->fingerprint( $fp );
325                                         $mr->master_record( $r->id );
326                                         $mr->id( OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
327                                 }
328
329                                 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
330                                 $mr_map->metarecord( $mr->id );
331                                 $mr_map->source( $r->id );
332                                 push @source_map, $mr_map;
333                         }
334
335                 }
336
337                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
338
339                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create', @source_map ) if (@source_map);
340                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
341                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.batch.create', @full_rec ) if (@full_rec);
342                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.batch.create', @{ $field_entry{title} } ) if (@{ $field_entry{title} });
343                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.batch.create', @{ $field_entry{author} } ) if (@{ $field_entry{author} });
344                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create', @{ $field_entry{subject} } ) if (@{ $field_entry{subject} });
345                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create', @{ $field_entry{keyword} } ) if (@{ $field_entry{keyword} });
346                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.batch.create', @{ $field_entry{series} } ) if (@{ $field_entry{series} });
347
348                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
349
350         } otherwise {
351                 $log->debug('Wormization failed : '.shift(), ERROR);
352                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
353                 $success = 0;
354         };
355
356         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
357         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
358         return $success;
359 }
360 __PACKAGE__->register_method(
361         api_name        => "open-ils.worm.wormize.biblio",
362         method          => "wormize_biblio_record",
363         api_level       => 1,
364         argc            => 1,
365 );
366 __PACKAGE__->register_method(
367         api_name        => "open-ils.worm.wormize.biblio.nomap",
368         method          => "wormize_biblio_record",
369         api_level       => 1,
370         argc            => 1,
371 );
372 __PACKAGE__->register_method(
373         api_name        => "open-ils.worm.wormize.biblio.noscrub",
374         method          => "wormize_biblio_record",
375         api_level       => 1,
376         argc            => 1,
377 );
378 __PACKAGE__->register_method(
379         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
380         method          => "wormize_biblio_record",
381         api_level       => 1,
382         argc            => 1,
383 );
384
385 sub wormize_authority_record {
386         my $self = shift;
387         my $client = shift;
388         my $rec = shift;
389
390         my $commit = 0;
391         if (!OpenILS::Application::WoRM->in_transaction) {
392                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
393                 $commit = 1;
394         }
395
396         my $success = 1;
397         try {
398                 # clean up the cruft
399                 unless ($self->api_name =~ /noscrub/o) {
400                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
401                 }
402
403                 # now redo 'em
404                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
405
406                 my @full_rec = ();
407                 my @rec_descriptor = ();
408                 for my $r (@$bibs) {
409                         my $xml = $parser->parse_string($r->marc);
410
411                         # the full_rec stuff
412                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
413                                 $fr->record( $r->id );
414                                 push @full_rec, $fr;
415                         }
416
417                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
418                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
419                         #$rd->record( $r->id );
420                         #push @rec_descriptor, $rd;
421                         
422                 }
423
424                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
425
426                 #OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
427                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
428
429                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
430
431         } otherwise {
432                 $log->debug('Wormization failed : '.shift(), ERROR);
433                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
434                 $success = 0;
435         };
436
437         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
438         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
439         return $success;
440 }
441 __PACKAGE__->register_method(
442         api_name        => "open-ils.worm.wormize.authority",
443         method          => "wormize_authority_record",
444         api_level       => 1,
445         argc            => 1,
446 );
447 __PACKAGE__->register_method(
448         api_name        => "open-ils.worm.wormize.authority.noscrub",
449         method          => "wormize_authority_record",
450         api_level       => 1,
451         argc            => 1,
452 );
453
454
455 # --------------------------------------------------------------------------------
456 # MARC index extraction
457
458 package OpenILS::Application::WoRM::XPATH;
459 use base qw/OpenILS::Application::WoRM/;
460 use Unicode::Normalize;
461
462 # give this a MODS documentElement and an XPATH expression
463 sub _xpath_to_string {
464         my $xml = shift;
465         my $xpath = shift;
466         my $ns_uri = shift;
467         my $ns_prefix = shift;
468         my $unique = shift;
469
470         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
471
472         my $string = "";
473
474         # grab the set of matching nodes
475         my @nodes = $xml->findnodes( $xpath );
476         for my $value (@nodes) {
477
478                 # grab all children of the node
479                 my @children = $value->childNodes();
480                 for my $child (@children) {
481
482                         # add the childs content to the growing buffer
483                         my $content = quotemeta($child->textContent);
484                         next if ($unique && $string =~ /$content/);  # uniquify the values
485                         $string .= $child->textContent . " ";
486                 }
487                 if( ! @children ) {
488                         $string .= $value->textContent . " ";
489                 }
490         }
491         NFD($string);
492         $string =~ s/(\pM)//gso;
493         return $string;
494 }
495
496 sub class_all_index_string_xml {
497         my $self = shift;
498         my $client = shift;
499         my $xml = shift;
500         my $class = shift;
501
502         OpenILS::Application::WoRM->post_init();
503         $xml = $parser->parse_string($xml) unless (ref $xml);
504         
505         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
506         for my $type ( keys %{ $xpathset->{$class} } ) {
507                 my $value =  _xpath_to_string(
508                                 $mods_sheet->transform($xml)->documentElement,
509                                 $xpathset->{$class}->{$type}->{xpath},
510                                 "http://www.loc.gov/mods/",
511                                 "mods",
512                                 1
513                 );
514
515                 next unless $value;
516
517                 $value =~ s/(\pM|\pC)//sgoe;
518                 $value =~ s/[\x80-\xff]//sgoe;
519                 $value = lc($value);
520
521                 my $fm = $class_constructor->new;
522                 $fm->value( $value );
523                 $fm->field( $xpathset->{$class}->{$type}->{id} );
524                 $client->respond($fm);
525         }
526         return undef;
527 }
528 __PACKAGE__->register_method(  
529         api_name        => "open-ils.worm.field_entry.class.xml",
530         method          => "class_all_index_string_xml",
531         api_level       => 1,
532         argc            => 1,
533         stream          => 1,
534 );                      
535
536 sub class_all_index_string_record {
537         my $self = shift;
538         my $client = shift;
539         my $rec = shift;
540         my $class = shift;
541
542         OpenILS::Application::WoRM->post_init();
543         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
544
545         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
546                 $fm->source($rec);
547                 $client->respond($fm);
548         }
549         return undef;
550 }
551 __PACKAGE__->register_method(  
552         api_name        => "open-ils.worm.field_entry.class.record",
553         method          => "class_all_index_string_record",
554         api_level       => 1,
555         argc            => 1,
556         stream          => 1,
557 );                      
558
559
560 sub class_index_string_xml {
561         my $self = shift;
562         my $client = shift;
563         my $xml = shift;
564         my $class = shift;
565         my $type = shift;
566
567         OpenILS::Application::WoRM->post_init();
568         $xml = $parser->parse_string($xml) unless (ref $xml);
569         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
570 }
571 __PACKAGE__->register_method(  
572         api_name        => "open-ils.worm.class.type.xml",
573         method          => "class_index_string_xml",
574         api_level       => 1,
575         argc            => 1,
576 );                      
577
578 sub class_index_string_record {
579         my $self = shift;
580         my $client = shift;
581         my $rec = shift;
582         my $class = shift;
583         my $type = shift;
584
585         OpenILS::Application::WoRM->post_init();
586         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
587
588         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
589         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
590         return $d;
591 }
592 __PACKAGE__->register_method(  
593         api_name        => "open-ils.worm.class.type.record",
594         method          => "class_index_string_record",
595         api_level       => 1,
596         argc            => 1,
597 );                      
598
599 sub xml_xpath {
600         my $self = shift;
601         my $client = shift;
602         my $xml = shift;
603         my $xpath = shift;
604         my $uri = shift;
605         my $prefix = shift;
606         my $unique = shift;
607
608         OpenILS::Application::WoRM->post_init();
609         $xml = $parser->parse_string($xml) unless (ref $xml);
610         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
611 }
612 __PACKAGE__->register_method(  
613         api_name        => "open-ils.worm.xpath.xml",
614         method          => "xml_xpath",
615         api_level       => 1,
616         argc            => 1,
617 );                      
618
619 sub record_xpath {
620         my $self = shift;
621         my $client = shift;
622         my $rec = shift;
623         my $xpath = shift;
624         my $uri = shift;
625         my $prefix = shift;
626         my $unique = shift;
627
628         OpenILS::Application::WoRM->post_init();
629         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
630
631         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
632         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
633         return $d;
634 }
635 __PACKAGE__->register_method(  
636         api_name        => "open-ils.worm.xpath.record",
637         method          => "record_xpath",
638         api_level       => 1,
639         argc            => 1,
640 );                      
641
642
643 # --------------------------------------------------------------------------------
644 # MARC Descriptor
645
646 package OpenILS::Application::WoRM::Biblio::Leader;
647 use base qw/OpenILS::Application::WoRM/;
648 use Unicode::Normalize;
649
650 our %biblio_descriptor_code = (
651         item_type => sub { substr($ldr,6,1); },
652         item_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/o) ? substr($oo8,29,1) : substr($oo8,23,1); },
653         bib_level => sub { substr($ldr,7,1); },
654         control_type => sub { substr($ldr,8,1); },
655         char_encoding => sub { substr($ldr,9,1); },
656         enc_level => sub { substr($ldr,17,1); },
657         cat_form => sub { substr($ldr,18,1); },
658         pub_status => sub { substr($ldr,5,1); },
659         item_lang => sub { substr($oo8,35,3); },
660         #lit_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"; },
661         audience => sub { substr($oo8,22,1); },
662 );
663
664 sub _extract_biblio_descriptors {
665         my $xml = shift;
666
667         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
668         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
669
670         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
671         for my $rd_field ( keys %biblio_descriptor_code ) {
672                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
673         }
674
675         return $rd_obj;
676 }
677
678 sub extract_biblio_desc_xml {
679         my $self = shift;
680         my $client = shift;
681         my $xml = shift;
682
683         $xml = $parser->parse_string($xml) unless (ref $xml);
684
685         return _extract_biblio_descriptors( $xml );
686 }
687 __PACKAGE__->register_method(  
688         api_name        => "open-ils.worm.biblio_leader.xml",
689         method          => "extract_biblio_desc_xml",
690         api_level       => 1,
691         argc            => 1,
692 );                      
693
694 sub extract_biblio_desc_record {
695         my $self = shift;
696         my $client = shift;
697         my $rec = shift;
698
699         OpenILS::Application::WoRM->post_init();
700         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
701
702         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
703         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
704         return $d;
705 }
706 __PACKAGE__->register_method(  
707         api_name        => "open-ils.worm.biblio_leader.record",
708         method          => "extract_biblio_desc_record",
709         api_level       => 1,
710         argc            => 1,
711 );                      
712
713 # --------------------------------------------------------------------------------
714 # Flat MARC
715
716 package OpenILS::Application::WoRM::FlatMARC;
717 use base qw/OpenILS::Application::WoRM/;
718 use Unicode::Normalize;
719
720
721 sub _marcxml_to_full_rows {
722
723         my $marcxml = shift;
724         my $xmltype = shift || 'metabib';
725
726         my $type = "Fieldmapper::${xmltype}::full_rec";
727
728         my @ns_list;
729         
730         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
731
732         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
733                 next unless $tagline;
734
735                 my $ns = $type->new;
736
737                 $ns->tag( 'LDR' );
738                 my $val = $tagline->textContent;
739                 NFD($val);
740                 $val =~ s/(\pM+)//gso;
741                 $ns->value( $val );
742
743                 push @ns_list, $ns;
744         }
745
746         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
747                 next unless $tagline;
748
749                 my $ns = $type->new;
750
751                 $ns->tag( $tagline->getAttribute( "tag" ) );
752                 my $val = $tagline->textContent;
753                 NFD($val);
754                 $val =~ s/(\pM+)//gso;
755                 $ns->value( $val );
756
757                 push @ns_list, $ns;
758         }
759
760         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
761                 next unless $tagline;
762
763                 my $tag = $tagline->getAttribute( "tag" );
764                 my $ind1 = $tagline->getAttribute( "ind1" );
765                 my $ind2 = $tagline->getAttribute( "ind2" );
766
767                 for my $data ( $tagline->childNodes ) {
768                         next unless $data;
769
770                         my $ns = $type->new;
771
772                         $ns->tag( $tag );
773                         $ns->ind1( $ind1 );
774                         $ns->ind2( $ind2 );
775                         $ns->subfield( $data->getAttribute( "code" ) );
776                         my $val = $data->textContent;
777                         NFD($val);
778                         $val =~ s/(\pM+)//gso;
779                         $ns->value( lc($val) );
780
781                         push @ns_list, $ns;
782                 }
783         }
784
785         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
786         return @ns_list;
787 }
788
789 sub flat_marc_xml {
790         my $self = shift;
791         my $client = shift;
792         my $xml = shift;
793
794         $xml = $parser->parse_string($xml) unless (ref $xml);
795
796         my $type = 'metabib';
797         $type = 'authority' if ($self->api_name =~ /authority/o);
798
799         OpenILS::Application::WoRM->post_init();
800
801         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
802         return undef;
803 }
804 __PACKAGE__->register_method(  
805         api_name        => "open-ils.worm.flat_marc.authority.xml",
806         method          => "flat_marc_xml",
807         api_level       => 1,
808         argc            => 1,
809         stream          => 1,
810 );                      
811 __PACKAGE__->register_method(  
812         api_name        => "open-ils.worm.flat_marc.biblio.xml",
813         method          => "flat_marc_xml",
814         api_level       => 1,
815         argc            => 1,
816         stream          => 1,
817 );                      
818
819 sub flat_marc_record {
820         my $self = shift;
821         my $client = shift;
822         my $rec = shift;
823
824         my $type = 'biblio';
825         $type = 'authority' if ($self->api_name =~ /authority/o);
826
827         OpenILS::Application::WoRM->post_init();
828         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
829
830         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
831         return undef;
832 }
833 __PACKAGE__->register_method(  
834         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
835         method          => "flat_marc_record",
836         api_level       => 1,
837         argc            => 1,
838         stream          => 1,
839 );                      
840 __PACKAGE__->register_method(  
841         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
842         method          => "flat_marc_record",
843         api_level       => 1,
844         argc            => 1,
845         stream          => 1,
846 );                      
847
848
849 # --------------------------------------------------------------------------------
850 # Fingerprinting
851
852 package OpenILS::Application::WoRM::Biblio::Fingerprint;
853 use base qw/OpenILS::Application::WoRM/;
854 use Unicode::Normalize;
855 use OpenSRF::EX qw/:try/;
856
857 my @fp_mods_xpath = (
858         '//mods:mods/mods:typeOfResource[text()="text"]' => [
859                         title   => {
860                                         xpath   => [
861                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
862                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
863                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
864                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
865                                         ],
866                                         fixup   => sub {
867                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
868                                                         NFD($text);
869                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
870                                                         $text =~ s/\pM+//gso;
871                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
872                                                         $text = lc($text);
873                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
874                                                         $text =~ s/\s+/ /sgo;
875                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
876                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
877                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
878                                                         $text =~ s/\b(?:the|an?)\b//sgo;
879                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
880                                                         $text =~ s/\[.[^\]]+\]//sgo;
881                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
882                                                         $text =~ s/\s*[;\/\.]*$//sgo;
883                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
884                                                 },
885                         },
886                         author  => {
887                                         xpath   => [
888                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
889                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
890                                         ],
891                                         fixup   => sub {
892                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
893                                                         NFD($text);
894                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
895                                                         $text =~ s/\pM+//gso;
896                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
897                                                         $text = lc($text);
898                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
899                                                         $text =~ s/\s+/ /sgo;
900                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
901                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
902                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
903                                                         $text =~ s/,?\s+.*$//sgo;
904                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
905                                                 },
906                         },
907         ],
908
909         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
910                         title   => {
911                                         xpath   => [
912                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
913                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
914                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
915                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
916                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
917                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
918                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
919                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
920                                         ],
921                                         fixup   => sub {
922                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
923                                                         NFD($text);
924                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
925                                                         $text =~ s/\pM+//gso;
926                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
927                                                         $text = lc($text);
928                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
929                                                         $text =~ s/\s+/ /sgo;
930                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
931                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
932                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
933                                                         $text =~ s/\b(?:the|an?)\b//sgo;
934                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
935                                                         $text =~ s/\[.[^\]]+\]//sgo;
936                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
937                                                         $text =~ s/\s*[;\/\.]*$//sgo;
938                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
939                                                 },
940                         },
941                         author  => {
942                                         xpath   => [
943                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
944                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
945                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
946                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
947                                         ],
948                                         fixup   => sub {
949                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
950                                                         NFD($text);
951                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
952                                                         $text =~ s/\pM+//gso;
953                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
954                                                         $text = lc($text);
955                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
956                                                         $text =~ s/\s+/ /sgo;
957                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
958                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
959                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
960                                                         $text =~ s/,?\s+.*$//sgo;
961                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
962                                                 },
963                         },
964         ],
965
966 );
967
968 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
969
970 sub _fp_mods {
971         my $mods = shift;
972         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
973
974         my $fp_string = '';
975
976         my $match_index = 0;
977         my $block_index = 1;
978         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
979                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
980
981                         my $block_name_index = 0;
982                         my $block_value_index = 1;
983                         my $block = $fp_mods_xpath[$block_index];
984                         while ( my $part = $$block[$block_value_index] ) {
985                                 local $text;
986                                 for my $xpath ( @{ $part->{xpath} } ) {
987                                         $text = $mods->findvalue( $xpath );
988                                         last if ($text);
989                                 }
990
991                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
992
993                                 if ($text) {
994                                         $$part{fixup}->();
995                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
996                                         $fp_string .= $text;
997                                 }
998
999                                 $block_name_index += 2;
1000                                 $block_value_index += 2;
1001                         }
1002                 }
1003                 if ($fp_string) {
1004                         $fp_string =~ s/\W+//gso;
1005                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1006                         return $fp_string;
1007                 }
1008
1009                 $match_index += 2;
1010                 $block_index += 2;
1011         }
1012         return undef;
1013 }
1014
1015 sub refingerprint_bibrec {
1016         my $self = shift;
1017         my $client = shift;
1018         my $rec = shift;
1019
1020         my $commit = 0;
1021         if (!OpenILS::Application::WoRM->in_transaction) {
1022                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1023                 $commit = 1;
1024         }
1025
1026         my $success = 1;
1027         try {
1028                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1029                 OpenILS::Application::WoRM->storage_req(
1030                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1031                         { id => $_->id },
1032                         { fingerprint => $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $_->marc ) }
1033                 ) for (@$bibs);
1034         } otherwise {
1035                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1036                 $success = 0;
1037         };
1038
1039         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
1040         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
1041         return $success;
1042 }
1043 __PACKAGE__->register_method(  
1044         api_name        => "open-ils.worm.fingerprint.record.update",
1045         method          => "refingerprint_bibrec",
1046         api_level       => 1,
1047         argc            => 1,
1048 );                      
1049
1050
1051 sub fingerprint_bibrec {
1052         my $self = shift;
1053         my $client = shift;
1054         my $rec = shift;
1055
1056         OpenILS::Application::WoRM->post_init();
1057         my $r = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1058
1059         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1060         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1061         return $fp;
1062
1063 }
1064 __PACKAGE__->register_method(  
1065         api_name        => "open-ils.worm.fingerprint.record",
1066         method          => "fingerprint_bibrec",
1067         api_level       => 1,
1068         argc            => 1,
1069 );                      
1070
1071 sub fingerprint_mods {
1072         my $self = shift;
1073         my $client = shift;
1074         my $xml = shift;
1075
1076         OpenILS::Application::WoRM->post_init();
1077         my $mods = $parser->parse_string($xml)->documentElement;
1078
1079         return _fp_mods( $mods );
1080 }
1081 __PACKAGE__->register_method(  
1082         api_name        => "open-ils.worm.fingerprint.mods",
1083         method          => "fingerprint_mods",
1084         api_level       => 1,
1085         argc            => 1,
1086 );                      
1087
1088 sub fingerprint_marc {
1089         my $self = shift;
1090         my $client = shift;
1091         my $xml = shift;
1092
1093         $xml = $parser->parse_string($xml) unless (ref $xml);
1094
1095         OpenILS::Application::WoRM->post_init();
1096         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1097         $log->debug("Returning [$fp] as fingerprint", INFO);
1098         return $fp;
1099 }
1100 __PACKAGE__->register_method(  
1101         api_name        => "open-ils.worm.fingerprint.marc",
1102         method          => "fingerprint_marc",
1103         api_level       => 1,
1104         argc            => 1,
1105 );                      
1106
1107
1108 # --------------------------------------------------------------------------------
1109
1110 1;
1111 __END__
1112 my $in_xact;
1113 my $begin;
1114 my $commit;
1115 my $rollback;
1116 my $lookup;
1117 my $update_entry;
1118 my $mr_lookup;
1119 my $mr_update;
1120 my $mr_create;
1121 my $create_source_map;
1122 my $sm_lookup;
1123 my $rm_old_rd;
1124 my $rm_old_sm;
1125 my $rm_old_fr;
1126 my $rm_old_tr;
1127 my $rm_old_ar;
1128 my $rm_old_sr;
1129 my $rm_old_kr;
1130 my $rm_old_ser;
1131
1132 my $fr_create;
1133 my $rd_create;
1134 my $create = {};
1135
1136 my %descriptor_code = (
1137         item_type => 'substr($ldr,6,1)',
1138         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1139         bib_level => 'substr($ldr,7,1)',
1140         control_type => 'substr($ldr,8,1)',
1141         char_encoding => 'substr($ldr,9,1)',
1142         enc_level => 'substr($ldr,17,1)',
1143         cat_form => 'substr($ldr,18,1)',
1144         pub_status => 'substr($ldr,5,1)',
1145         item_lang => 'substr($oo8,35,3)',
1146         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1147         audience => 'substr($oo8,22,1)',
1148 );
1149
1150 sub wormize {
1151
1152         my $self = shift;
1153         my $client = shift;
1154         my @docids = @_;
1155
1156         my $no_map = 0;
1157         if ($self->api_name =~ /no_map/o) {
1158                 $no_map = 1;
1159         }
1160
1161         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1162                 unless ($in_xact);
1163         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1164                 unless ($begin);
1165         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1166                 unless ($commit);
1167         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1168                 unless ($rollback);
1169         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
1170                 unless ($sm_lookup);
1171         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
1172                 unless ($mr_lookup);
1173         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
1174                 unless ($mr_update);
1175         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
1176                 unless ($lookup);
1177         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
1178                 unless ($update_entry);
1179         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
1180                 unless ($rm_old_sm);
1181         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
1182                 unless ($rm_old_rd);
1183         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
1184                 unless ($rm_old_fr);
1185         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
1186                 unless ($rm_old_tr);
1187         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
1188                 unless ($rm_old_ar);
1189         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
1190                 unless ($rm_old_sr);
1191         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
1192                 unless ($rm_old_kr);
1193         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
1194                 unless ($rm_old_ser);
1195         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
1196                 unless ($mr_create);
1197         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
1198                 unless ($create_source_map);
1199         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
1200                 unless ($rd_create);
1201         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
1202                 unless ($fr_create);
1203         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
1204                 unless ($$create{title});
1205         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
1206                 unless ($$create{author});
1207         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
1208                 unless ($$create{subject});
1209         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
1210                 unless ($$create{keyword});
1211         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
1212                 unless ($$create{series});
1213
1214
1215         my ($outer_xact) = $in_xact->run;
1216         try {
1217                 unless ($outer_xact) {
1218                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1219                         my ($r) = $begin->run($client);
1220                         unless (defined $r and $r) {
1221                                 $rollback->run;
1222                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1223                         }
1224                 }
1225         } catch Error with {
1226                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1227         };
1228
1229         my @source_maps;
1230         my @entry_list;
1231         my @mr_list;
1232         my @rd_list;
1233         my @ns_list;
1234         my @mods_data;
1235         my $ret = 0;
1236         for my $entry ( $lookup->run(@docids) ) {
1237                 # step -1: grab the doc from storage
1238                 next unless ($entry);
1239
1240                 if(!$mods_sheet) {
1241                         my $xslt_doc = $parser->parse_file(
1242                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1243                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
1244                 }
1245
1246                 my $xml = $entry->marc;
1247                 my $docid = $entry->id;
1248                 my $marcdoc = $parser->parse_string($xml);
1249                 my $modsdoc = $mods_sheet->transform($marcdoc);
1250
1251                 my $mods = $modsdoc->documentElement;
1252                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1253
1254                 $entry->fingerprint( fingerprint_mods( $mods ) );
1255                 push @entry_list, $entry;
1256
1257                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
1258
1259                 unless ($no_map) {
1260                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
1261                         if (!$mr || !@$mr) {
1262                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
1263                                 $mr = new Fieldmapper::metabib::metarecord;
1264                                 $mr->fingerprint( $entry->fingerprint );
1265                                 $mr->master_record( $entry->id );
1266                                 my ($new_mr) = $mr_create->run($mr);
1267                                 $mr->id($new_mr);
1268                                 unless (defined $mr) {
1269                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
1270                                 }
1271                         } else {
1272                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
1273                                 $mr->mods('');
1274                                 push @mr_list, $mr;
1275                         }
1276
1277                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
1278                         $sm->metarecord( $mr->id );
1279                         $sm->source( $entry->id );
1280                         push @source_maps, $sm;
1281                 }
1282
1283                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1284                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1285
1286                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1287                 for my $rd_field ( keys %descriptor_code ) {
1288                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1289                 }
1290                 $rd_obj->record( $docid );
1291                 push @rd_list, $rd_obj;
1292
1293                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
1294
1295                 # step 2: build the KOHA rows
1296                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
1297                 $_->record( $docid ) for (@tmp_list);
1298                 push @ns_list, @tmp_list;
1299
1300                 $ret++;
1301
1302                 last unless ($self->api_name =~ /batch$/o);
1303         }
1304
1305         $rm_old_rd->run( { record => \@docids } );
1306         $rm_old_fr->run( { record => \@docids } );
1307         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
1308         $rm_old_tr->run( { source => \@docids } );
1309         $rm_old_ar->run( { source => \@docids } );
1310         $rm_old_sr->run( { source => \@docids } );
1311         $rm_old_kr->run( { source => \@docids } );
1312         $rm_old_ser->run( { source => \@docids } );
1313
1314         unless ($no_map) {
1315                 my ($sm) = $create_source_map->run(@source_maps);
1316                 unless (defined $sm) {
1317                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
1318                 }
1319                 my ($mr) = $mr_update->run(@mr_list);
1320                 unless (defined $mr) {
1321                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
1322                 }
1323         }
1324
1325         my ($re) = $update_entry->run(@entry_list);
1326         unless (defined $re) {
1327                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
1328         }
1329
1330         my ($rd) = $rd_create->run(@rd_list);
1331         unless (defined $rd) {
1332                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
1333         }
1334
1335         my ($fr) = $fr_create->run(@ns_list);
1336         unless (defined $fr) {
1337                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
1338         }
1339
1340         # step 5: insert the new metadata
1341         for my $class ( qw/title author subject keyword series/ ) {
1342                 my @md_list = ();
1343                 for my $doc ( @mods_data ) {
1344                         my ($did) = keys %$doc;
1345                         my ($data) = values %$doc;
1346
1347                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
1348                         for my $row ( keys %{ $$data{$class} } ) {
1349                                 next unless (exists $$data{$class}{$row});
1350                                 next unless ($$data{$class}{$row}{value});
1351                                 my $fm_obj = $fm_constructor->new;
1352                                 $fm_obj->value( $$data{$class}{$row}{value} );
1353                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
1354                                 $fm_obj->source( $did );
1355                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
1356
1357                                 push @md_list, $fm_obj;
1358                         }
1359                 }
1360                         
1361                 my ($cr) = $$create{$class}->run(@md_list);
1362                 unless (defined $cr) {
1363                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
1364                 }
1365         }
1366
1367         unless ($outer_xact) {
1368                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1369                 my ($c) = $commit->run;
1370                 unless (defined $c and $c) {
1371                         $rollback->run;
1372                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1373                 }
1374         }
1375
1376         return $ret;
1377 }
1378 __PACKAGE__->register_method( 
1379         api_name        => "open-ils.worm.wormize",
1380         method          => "wormize",
1381         api_level       => 1,
1382         argc            => 1,
1383 );
1384 __PACKAGE__->register_method( 
1385         api_name        => "open-ils.worm.wormize.no_map",
1386         method          => "wormize",
1387         api_level       => 1,
1388         argc            => 1,
1389 );
1390 __PACKAGE__->register_method( 
1391         api_name        => "open-ils.worm.wormize.batch",
1392         method          => "wormize",
1393         api_level       => 1,
1394         argc            => 1,
1395 );
1396 __PACKAGE__->register_method( 
1397         api_name        => "open-ils.worm.wormize.no_map.batch",
1398         method          => "wormize",
1399         api_level       => 1,
1400         argc            => 1,
1401 );
1402
1403
1404 my $ain_xact;
1405 my $abegin;
1406 my $acommit;
1407 my $arollback;
1408 my $alookup;
1409 my $aupdate_entry;
1410 my $amr_lookup;
1411 my $amr_update;
1412 my $amr_create;
1413 my $acreate_source_map;
1414 my $asm_lookup;
1415 my $arm_old_rd;
1416 my $arm_old_sm;
1417 my $arm_old_fr;
1418 my $arm_old_tr;
1419 my $arm_old_ar;
1420 my $arm_old_sr;
1421 my $arm_old_kr;
1422 my $arm_old_ser;
1423
1424 my $afr_create;
1425 my $ard_create;
1426 my $acreate = {};
1427
1428 sub authority_wormize {
1429
1430         my $self = shift;
1431         my $client = shift;
1432         my @docids = @_;
1433
1434         my $no_map = 0;
1435         if ($self->api_name =~ /no_map/o) {
1436                 $no_map = 1;
1437         }
1438
1439         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1440                 unless ($in_xact);
1441         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1442                 unless ($begin);
1443         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1444                 unless ($commit);
1445         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1446                 unless ($rollback);
1447         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1448                 unless ($alookup);
1449         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1450                 unless ($aupdate_entry);
1451         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1452                 unless ($arm_old_rd);
1453         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1454                 unless ($arm_old_fr);
1455         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1456                 unless ($ard_create);
1457         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1458                 unless ($afr_create);
1459
1460
1461         my ($outer_xact) = $in_xact->run;
1462         try {
1463                 unless ($outer_xact) {
1464                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1465                         my ($r) = $begin->run($client);
1466                         unless (defined $r and $r) {
1467                                 $rollback->run;
1468                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1469                         }
1470                 }
1471         } catch Error with {
1472                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1473         };
1474
1475         my @source_maps;
1476         my @entry_list;
1477         my @mr_list;
1478         my @rd_list;
1479         my @ns_list;
1480         my @mads_data;
1481         my $ret = 0;
1482         for my $entry ( $lookup->run(@docids) ) {
1483                 # step -1: grab the doc from storage
1484                 next unless ($entry);
1485
1486                 #if(!$mads_sheet) {
1487                 #       my $xslt_doc = $parser->parse_file(
1488                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1489                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1490                 #}
1491
1492                 my $xml = $entry->marc;
1493                 my $docid = $entry->id;
1494                 my $marcdoc = $parser->parse_string($xml);
1495                 #my $madsdoc = $mads_sheet->transform($marcdoc);
1496
1497                 #my $mads = $madsdoc->documentElement;
1498                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1499
1500                 push @entry_list, $entry;
1501
1502                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1503                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1504
1505                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1506                 for my $rd_field ( keys %descriptor_code ) {
1507                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1508                 }
1509                 $rd_obj->record( $docid );
1510                 push @rd_list, $rd_obj;
1511
1512                 # step 2: build the KOHA rows
1513                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1514                 $_->record( $docid ) for (@tmp_list);
1515                 push @ns_list, @tmp_list;
1516
1517                 $ret++;
1518
1519                 last unless ($self->api_name =~ /batch$/o);
1520         }
1521
1522         $arm_old_rd->run( { record => \@docids } );
1523         $arm_old_fr->run( { record => \@docids } );
1524
1525         my ($rd) = $ard_create->run(@rd_list);
1526         unless (defined $rd) {
1527                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1528         }
1529
1530         my ($fr) = $fr_create->run(@ns_list);
1531         unless (defined $fr) {
1532                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1533         }
1534
1535         unless ($outer_xact) {
1536                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1537                 my ($c) = $commit->run;
1538                 unless (defined $c and $c) {
1539                         $rollback->run;
1540                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1541                 }
1542         }
1543
1544         return $ret;
1545 }
1546 __PACKAGE__->register_method( 
1547         api_name        => "open-ils.worm.authortiy.wormize",
1548         method          => "wormize",
1549         api_level       => 1,
1550         argc            => 1,
1551 );
1552 __PACKAGE__->register_method( 
1553         api_name        => "open-ils.worm.authority.wormize.batch",
1554         method          => "wormize",
1555         api_level       => 1,
1556         argc            => 1,
1557 );
1558
1559
1560 # --------------------------------------------------------------------------------
1561
1562
1563 sub _marcxml_to_full_rows {
1564
1565         my $marcxml = shift;
1566         my $type = shift || 'Fieldmapper::metabib::full_rec';
1567
1568         my @ns_list;
1569         
1570         my $root = $marcxml->documentElement;
1571
1572         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1573                 next unless $tagline;
1574
1575                 my $ns = new Fieldmapper::metabib::full_rec;
1576
1577                 $ns->tag( 'LDR' );
1578                 my $val = NFD($tagline->textContent);
1579                 $val =~ s/(\pM+)//gso;
1580                 $ns->value( $val );
1581
1582                 push @ns_list, $ns;
1583         }
1584
1585         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1586                 next unless $tagline;
1587
1588                 my $ns = new Fieldmapper::metabib::full_rec;
1589
1590                 $ns->tag( $tagline->getAttribute( "tag" ) );
1591                 my $val = NFD($tagline->textContent);
1592                 $val =~ s/(\pM+)//gso;
1593                 $ns->value( $val );
1594
1595                 push @ns_list, $ns;
1596         }
1597
1598         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1599                 next unless $tagline;
1600
1601                 my $tag = $tagline->getAttribute( "tag" );
1602                 my $ind1 = $tagline->getAttribute( "ind1" );
1603                 my $ind2 = $tagline->getAttribute( "ind2" );
1604
1605                 for my $data ( $tagline->childNodes ) {
1606                         next unless $data;
1607
1608                         my $ns = $type->new;
1609
1610                         $ns->tag( $tag );
1611                         $ns->ind1( $ind1 );
1612                         $ns->ind2( $ind2 );
1613                         $ns->subfield( $data->getAttribute( "code" ) );
1614                         my $val = NFD($data->textContent);
1615                         $val =~ s/(\pM+)//gso;
1616                         $ns->value( lc($val) );
1617
1618                         push @ns_list, $ns;
1619                 }
1620         }
1621         return @ns_list;
1622 }
1623
1624 sub _get_field_value {
1625
1626         my( $root, $xpath ) = @_;
1627
1628         my $string = "";
1629
1630         # grab the set of matching nodes
1631         my @nodes = $root->findnodes( $xpath );
1632         for my $value (@nodes) {
1633
1634                 # grab all children of the node
1635                 my @children = $value->childNodes();
1636                 for my $child (@children) {
1637
1638                         # add the childs content to the growing buffer
1639                         my $content = quotemeta($child->textContent);
1640                         next if ($string =~ /$content/);  # uniquify the values
1641                         $string .= $child->textContent . " ";
1642                 }
1643                 if( ! @children ) {
1644                         $string .= $value->textContent . " ";
1645                 }
1646         }
1647         $string = NFD($string);
1648         $string =~ s/(\pM)//gso;
1649         return lc($string);
1650 }
1651
1652
1653 sub modsdoc_to_values {
1654         my( $self, $mods ) = @_;
1655         my $data = {};
1656         for my $class (keys %$xpathset) {
1657                 $data->{$class} = {};
1658                 for my $type (keys %{$xpathset->{$class}}) {
1659                         $data->{$class}->{$type} = {};
1660                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
1661                 }
1662         }
1663         return $data;
1664 }
1665
1666
1667 1;
1668
1669