165b180c66d254cc814f65f3d5bb85826e8a798b
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / WoRM.pm
1 package OpenILS::Application::WoRM;
2 use base qw/OpenSRF::Application/;
3 use open qw/:utf8/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::FlatXML;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21
22 our $log = 'OpenSRF::Utils::Logger';
23 our $xml_util = OpenILS::Utils::FlatXML->new();
24
25 our $parser = XML::LibXML->new();
26 our $xslt = XML::LibXSLT->new();
27 our $mods_sheet;
28 our $mads_sheet;
29
30 our $st_sess;
31 sub st_sess {
32         my $self = shift;
33         my $sess = shift;
34         $st_sess = $sess if ($sess);
35         return $st_sess;
36 }
37
38 our $xpathset = {};
39
40 sub initialize {}
41 sub child_init {}
42
43 sub post_init {
44         $log->debug("Running post_init", DEBUG);
45
46         unless ($mods_sheet) {
47                 $log->debug("Loading MODS XSLT", DEBUG);
48                 my $xslt_doc = $parser->parse_file(
49                         OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
50                 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
51         }
52
53         #if (!__PACKAGE__->st_sess()) {
54         #       $log->debug("Creating cached storage server session", DEBUG);
55         #       __PACKAGE__->st_sess( OpenSRF::AppSession->create('open-ils.storage') );
56         #}
57
58         unless (keys %$xpathset) {
59                 my $req = __PACKAGE__->storage_req('open-ils.storage.direct.config.metabib_field.retrieve.all.atomic');
60                 for my $f (@$req) {
61                         $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
62                         $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
63                         $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
64                 }
65         }
66 }
67
68
69 sub in_transaction {
70         OpenILS::Application::WoRM->post_init();
71         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
72 }
73
74 sub begin_transaction {
75         my $self = shift;
76         my $client = shift;
77         
78         OpenILS::Application::WoRM->post_init();
79         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
80         
81         try {
82                 if (!$outer_xact) {
83                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
84                         #__PACKAGE__->st_sess->connect;
85                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
86                         unless (defined $r and $r) {
87                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
88                                 #__PACKAGE__->st_sess->disconnect;
89                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
90                         }
91                 }
92         } otherwise {
93                 $log->debug("WoRM Couldn't BEGIN transaction!", ERROR)
94         };
95
96         return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
97 }
98
99 sub rollback_transaction {
100         my $self = shift;
101         my $client = shift;
102
103         OpenILS::Application::WoRM->post_init();
104         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
105
106         try {
107                 if ($outer_xact) {
108                         __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
109                 } else {
110                         $log->debug("WoRM isn't inside a transaction.", INFO);
111                 }
112         } catch Error with {
113                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
114         };
115
116         return 1;
117 }
118
119 sub commit_transaction {
120         my $self = shift;
121         my $client = shift;
122
123         OpenILS::Application::WoRM->post_init();
124         my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
125
126         try {
127                 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
128                 if ($outer_xact) {
129                         my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
130                         unless (defined $r and $r) {
131                                 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
132                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
133                         }
134                         #__PACKAGE__->st_sess->disconnect;
135                 } else {
136                         $log->debug("WoRM isn't inside a transaction.", INFO);
137                 }
138         } catch Error with {
139                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
140         };
141
142         return 1;
143 }
144
145 sub storage_req {
146         my $self = shift;
147         my $method = shift;
148         my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
149         return shift( @res );
150 }
151
152 sub scrub_authority_record {
153         my $self = shift;
154         my $client = shift;
155         my $rec = shift;
156
157         my $commit = 0;
158         if (!OpenILS::Application::WoRM->in_transaction) {
159                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
160                 $commit = 1;
161         }
162
163         my $success = 1;
164         try {
165                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
166
167                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
168                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
169
170                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
171         } otherwise {
172                 $log->debug('Scrubbing failed : '.shift(), ERROR);
173                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
174                 $success = 0;
175         };
176
177         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
178         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
179         return $success;
180 }
181 __PACKAGE__->register_method(  
182         api_name        => "open-ils.worm.scrub.authority",
183         method          => "scrub_authority_record",
184         api_level       => 1,
185         argc            => 1,
186 );                      
187
188
189 sub scrub_metabib_record {
190         my $self = shift;
191         my $client = shift;
192         my $rec = shift;
193
194         my $commit = 0;
195         if (!OpenILS::Application::WoRM->in_transaction) {
196                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
197                 $commit = 1;
198         }
199
200         my $success = 1;
201         try {
202                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
203                 
204                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
205                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
206                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
207                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
208                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
209                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
210                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
211                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
212
213                 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
214                 my $masters = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
215
216                 for my $mr (@$masters) {
217                         $log->debug( "Found metarecord whose master is $rec", DEBUG);
218                         my $others = OpenILS::Application::WoRM->storage_req(
219                                         'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
220
221                         if (@$others) {
222                                 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
223                                 $mr->master_record($others->[0]->source);
224                                 OpenILS::Application::WoRM->storage_req(
225                                         'open-ils.storage.direct.metabib.metarecord.remote_update',
226                                         { id => $mr->id },
227                                         { master_record => $others->[0]->source }
228                                 );
229                         } else {
230                                 warn "Removing metarecord whose master is $rec";
231                                 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
232                                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
233                                 warn "Metarecord removed";
234                                 $log->debug( "Metarecord removed", DEBUG);
235                         }
236                 }
237
238                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
239
240         } otherwise {
241                 $log->debug('Scrubbing failed : '.shift(), ERROR);
242                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
243                 $success = 0;
244         };
245
246         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
247         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
248         return $success;
249 }
250 __PACKAGE__->register_method(  
251         api_name        => "open-ils.worm.scrub.biblio",
252         method          => "scrub_metabib_record",
253         api_level       => 1,
254         argc            => 1,
255 );                      
256
257 sub wormize_biblio_record {
258         my $self = shift;
259         my $client = shift;
260         my $rec = shift;
261
262         my $commit = 0;
263         if (!OpenILS::Application::WoRM->in_transaction) {
264                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
265                 $commit = 1;
266         }
267
268         my $success = 1;
269         try {
270                 # clean up the cruft
271                 unless ($self->api_name =~ /noscrub/o) {
272                         $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
273                 }
274
275                 # now redo 'em
276                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
277
278                 my @full_rec = ();
279                 my @rec_descriptor = ();
280                 my %field_entry = (
281                         title   => [],
282                         author  => [],
283                         subject => [],
284                         keyword => [],
285                         series  => [],
286                 );
287                 my @metarecord = ();
288                 my @source_map = ();
289                 for my $r (@$bibs) {
290                         my $xml = $parser->parse_string($r->marc);
291
292                         # the full_rec stuff
293                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
294                                 $fr->record( $r->id );
295                                 push @full_rec, $fr;
296                         }
297
298                         # the rec_descriptor stuff
299                         my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
300                         $rd->record( $r->id );
301                         push @rec_descriptor, $rd;
302                         
303                         # the indexing field entry stuff
304                         for my $class ( qw/title author subject keyword series/ ) {
305                                 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
306                                         $fe->source( $r->id );
307                                         push @{$field_entry{$class}}, $fe;
308                                 }
309                         }
310
311                         #update the fingerprint
312                         my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
313                         OpenILS::Application::WoRM->storage_req(
314                                 'open-ils.storage.direct.biblio.record_entry.remote_update',
315                                 { id => $r->id },
316                                 { fingerprint => $fp }
317                         ) if ($fp ne $r->fingerprint);
318
319                         unless ($self->api_name =~ /nomap/o) {
320                                 my $mr = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp  )->[0];
321                                 
322                                 unless ($mr) {
323                                         $mr = Fieldmapper::metabib::metarecord->new;
324                                         $mr->fingerprint( $fp );
325                                         $mr->master_record( $r->id );
326                                         $mr->id( OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
327                                 }
328
329                                 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
330                                 $mr_map->metarecord( $mr->id );
331                                 $mr_map->source( $r->id );
332                                 push @source_map, $mr_map;
333                         }
334
335                 }
336
337                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
338
339                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create', @source_map ) if (@source_map);
340                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
341                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.batch.create', @full_rec ) if (@full_rec);
342                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.batch.create', @{ $field_entry{title} } ) if (@{ $field_entry{title} });
343                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.batch.create', @{ $field_entry{author} } ) if (@{ $field_entry{author} });
344                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create', @{ $field_entry{subject} } ) if (@{ $field_entry{subject} });
345                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create', @{ $field_entry{keyword} } ) if (@{ $field_entry{keyword} });
346                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.batch.create', @{ $field_entry{series} } ) if (@{ $field_entry{series} });
347
348                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
349
350         } otherwise {
351                 $log->debug('Wormization failed : '.shift(), ERROR);
352                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
353                 $success = 0;
354         };
355
356         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
357         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
358         return $success;
359 }
360 __PACKAGE__->register_method(
361         api_name        => "open-ils.worm.wormize.biblio",
362         method          => "wormize_biblio_record",
363         api_level       => 1,
364         argc            => 1,
365 );
366 __PACKAGE__->register_method(
367         api_name        => "open-ils.worm.wormize.biblio.nomap",
368         method          => "wormize_biblio_record",
369         api_level       => 1,
370         argc            => 1,
371 );
372 __PACKAGE__->register_method(
373         api_name        => "open-ils.worm.wormize.biblio.noscrub",
374         method          => "wormize_biblio_record",
375         api_level       => 1,
376         argc            => 1,
377 );
378 __PACKAGE__->register_method(
379         api_name        => "open-ils.worm.wormize.biblio.nomap.noscrub",
380         method          => "wormize_biblio_record",
381         api_level       => 1,
382         argc            => 1,
383 );
384
385 sub wormize_authority_record {
386         my $self = shift;
387         my $client = shift;
388         my $rec = shift;
389
390         my $commit = 0;
391         if (!OpenILS::Application::WoRM->in_transaction) {
392                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
393                 $commit = 1;
394         }
395
396         my $success = 1;
397         try {
398                 # clean up the cruft
399                 unless ($self->api_name =~ /noscrub/o) {
400                         $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
401                 }
402
403                 # now redo 'em
404                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
405
406                 my @full_rec = ();
407                 my @rec_descriptor = ();
408                 for my $r (@$bibs) {
409                         my $xml = $parser->parse_string($r->marc);
410
411                         # the full_rec stuff
412                         for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
413                                 $fr->record( $r->id );
414                                 push @full_rec, $fr;
415                         }
416
417                         # the rec_descriptor stuff -- XXX What does this mean for authority records?
418                         #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
419                         #$rd->record( $r->id );
420                         #push @rec_descriptor, $rd;
421                         
422                 }
423
424                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
425
426                 #OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
427                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
428
429                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
430
431         } otherwise {
432                 $log->debug('Wormization failed : '.shift(), ERROR);
433                 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
434                 $success = 0;
435         };
436
437         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
438         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
439         return $success;
440 }
441 __PACKAGE__->register_method(
442         api_name        => "open-ils.worm.wormize.authority",
443         method          => "wormize_authority_record",
444         api_level       => 1,
445         argc            => 1,
446 );
447 __PACKAGE__->register_method(
448         api_name        => "open-ils.worm.wormize.authority.noscrub",
449         method          => "wormize_authority_record",
450         api_level       => 1,
451         argc            => 1,
452 );
453
454
455 # --------------------------------------------------------------------------------
456 # MARC index extraction
457
458 package OpenILS::Application::WoRM::XPATH;
459 use base qw/OpenILS::Application::WoRM/;
460 use Unicode::Normalize;
461
462 # give this a MODS documentElement and an XPATH expression
463 sub _xpath_to_string {
464         my $xml = shift;
465         my $xpath = shift;
466         my $ns_uri = shift;
467         my $ns_prefix = shift;
468         my $unique = shift;
469
470         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
471
472         my $string = "";
473
474         # grab the set of matching nodes
475         my @nodes = $xml->findnodes( $xpath );
476         for my $value (@nodes) {
477
478                 # grab all children of the node
479                 my @children = $value->childNodes();
480                 for my $child (@children) {
481
482                         # add the childs content to the growing buffer
483                         my $content = quotemeta($child->textContent);
484                         next if ($unique && $string =~ /$content/);  # uniquify the values
485                         $string .= $child->textContent . " ";
486                 }
487                 if( ! @children ) {
488                         $string .= $value->textContent . " ";
489                 }
490         }
491         NFD($string);
492         $string =~ s/(\pM)//gso;
493         return $string;
494 }
495
496 sub class_all_index_string_xml {
497         my $self = shift;
498         my $client = shift;
499         my $xml = shift;
500         my $class = shift;
501
502         OpenILS::Application::WoRM->post_init();
503         $xml = $parser->parse_string($xml) unless (ref $xml);
504         
505         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
506         for my $type ( keys %{ $xpathset->{$class} } ) {
507                 my $value =  _xpath_to_string(
508                                 $mods_sheet->transform($xml)->documentElement,
509                                 $xpathset->{$class}->{$type}->{xpath},
510                                 "http://www.loc.gov/mods/",
511                                 "mods",
512                                 1
513                 );
514
515                 next unless $value;
516
517                 my $fm = $class_constructor->new;
518                 $fm->value( $value );
519                 $fm->field( $xpathset->{$class}->{$type}->{id} );
520                 $client->respond($fm);
521         }
522         return undef;
523 }
524 __PACKAGE__->register_method(  
525         api_name        => "open-ils.worm.field_entry.class.xml",
526         method          => "class_all_index_string_xml",
527         api_level       => 1,
528         argc            => 1,
529         stream          => 1,
530 );                      
531
532 sub class_all_index_string_record {
533         my $self = shift;
534         my $client = shift;
535         my $rec = shift;
536         my $class = shift;
537
538         OpenILS::Application::WoRM->post_init();
539         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
540
541         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
542                 $fm->source($rec);
543                 $client->respond($fm);
544         }
545         return undef;
546 }
547 __PACKAGE__->register_method(  
548         api_name        => "open-ils.worm.field_entry.class.record",
549         method          => "class_all_index_string_record",
550         api_level       => 1,
551         argc            => 1,
552         stream          => 1,
553 );                      
554
555
556 sub class_index_string_xml {
557         my $self = shift;
558         my $client = shift;
559         my $xml = shift;
560         my $class = shift;
561         my $type = shift;
562
563         OpenILS::Application::WoRM->post_init();
564         $xml = $parser->parse_string($xml) unless (ref $xml);
565         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
566 }
567 __PACKAGE__->register_method(  
568         api_name        => "open-ils.worm.class.type.xml",
569         method          => "class_index_string_xml",
570         api_level       => 1,
571         argc            => 1,
572 );                      
573
574 sub class_index_string_record {
575         my $self = shift;
576         my $client = shift;
577         my $rec = shift;
578         my $class = shift;
579         my $type = shift;
580
581         OpenILS::Application::WoRM->post_init();
582         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
583
584         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
585         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
586         return $d;
587 }
588 __PACKAGE__->register_method(  
589         api_name        => "open-ils.worm.class.type.record",
590         method          => "class_index_string_record",
591         api_level       => 1,
592         argc            => 1,
593 );                      
594
595 sub xml_xpath {
596         my $self = shift;
597         my $client = shift;
598         my $xml = shift;
599         my $xpath = shift;
600         my $uri = shift;
601         my $prefix = shift;
602         my $unique = shift;
603
604         OpenILS::Application::WoRM->post_init();
605         $xml = $parser->parse_string($xml) unless (ref $xml);
606         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
607 }
608 __PACKAGE__->register_method(  
609         api_name        => "open-ils.worm.xpath.xml",
610         method          => "xml_xpath",
611         api_level       => 1,
612         argc            => 1,
613 );                      
614
615 sub record_xpath {
616         my $self = shift;
617         my $client = shift;
618         my $rec = shift;
619         my $xpath = shift;
620         my $uri = shift;
621         my $prefix = shift;
622         my $unique = shift;
623
624         OpenILS::Application::WoRM->post_init();
625         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
626
627         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
628         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
629         return $d;
630 }
631 __PACKAGE__->register_method(  
632         api_name        => "open-ils.worm.xpath.record",
633         method          => "record_xpath",
634         api_level       => 1,
635         argc            => 1,
636 );                      
637
638
639 # --------------------------------------------------------------------------------
640 # MARC Descriptor
641
642 package OpenILS::Application::WoRM::Biblio::Leader;
643 use base qw/OpenILS::Application::WoRM/;
644 use Unicode::Normalize;
645
646 our %biblio_descriptor_code = (
647         item_type => sub { substr($ldr,6,1); },
648         item_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/o) ? substr($oo8,29,1) : substr($oo8,23,1); },
649         bib_level => sub { substr($ldr,7,1); },
650         control_type => sub { substr($ldr,8,1); },
651         char_encoding => sub { substr($ldr,9,1); },
652         enc_level => sub { substr($ldr,17,1); },
653         cat_form => sub { substr($ldr,18,1); },
654         pub_status => sub { substr($ldr,5,1); },
655         item_lang => sub { substr($oo8,35,3); },
656         #lit_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"; },
657         audience => sub { substr($oo8,22,1); },
658 );
659
660 sub _extract_biblio_descriptors {
661         my $xml = shift;
662
663         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
664         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
665
666         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
667         for my $rd_field ( keys %biblio_descriptor_code ) {
668                 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
669         }
670
671         return $rd_obj;
672 }
673
674 sub extract_biblio_desc_xml {
675         my $self = shift;
676         my $client = shift;
677         my $xml = shift;
678
679         $xml = $parser->parse_string($xml) unless (ref $xml);
680
681         return _extract_biblio_descriptors( $xml );
682 }
683 __PACKAGE__->register_method(  
684         api_name        => "open-ils.worm.biblio_leader.xml",
685         method          => "extract_biblio_desc_xml",
686         api_level       => 1,
687         argc            => 1,
688 );                      
689
690 sub extract_biblio_desc_record {
691         my $self = shift;
692         my $client = shift;
693         my $rec = shift;
694
695         OpenILS::Application::WoRM->post_init();
696         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
697
698         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
699         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
700         return $d;
701 }
702 __PACKAGE__->register_method(  
703         api_name        => "open-ils.worm.biblio_leader.record",
704         method          => "extract_biblio_desc_record",
705         api_level       => 1,
706         argc            => 1,
707 );                      
708
709 # --------------------------------------------------------------------------------
710 # Flat MARC
711
712 package OpenILS::Application::WoRM::FlatMARC;
713 use base qw/OpenILS::Application::WoRM/;
714 use Unicode::Normalize;
715
716
717 sub _marcxml_to_full_rows {
718
719         my $marcxml = shift;
720         my $xmltype = shift || 'metabib';
721
722         my $type = "Fieldmapper::${xmltype}::full_rec";
723
724         my @ns_list;
725         
726         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
727
728         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
729                 next unless $tagline;
730
731                 my $ns = $type->new;
732
733                 $ns->tag( 'LDR' );
734                 my $val = $tagline->textContent;
735                 NFD($val);
736                 $val =~ s/(\pM+)//gso;
737                 $ns->value( $val );
738
739                 push @ns_list, $ns;
740         }
741
742         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
743                 next unless $tagline;
744
745                 my $ns = $type->new;
746
747                 $ns->tag( $tagline->getAttribute( "tag" ) );
748                 my $val = $tagline->textContent;
749                 NFD($val);
750                 $val =~ s/(\pM+)//gso;
751                 $ns->value( $val );
752
753                 push @ns_list, $ns;
754         }
755
756         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
757                 next unless $tagline;
758
759                 my $tag = $tagline->getAttribute( "tag" );
760                 my $ind1 = $tagline->getAttribute( "ind1" );
761                 my $ind2 = $tagline->getAttribute( "ind2" );
762
763                 for my $data ( $tagline->childNodes ) {
764                         next unless $data;
765
766                         my $ns = $type->new;
767
768                         $ns->tag( $tag );
769                         $ns->ind1( $ind1 );
770                         $ns->ind2( $ind2 );
771                         $ns->subfield( $data->getAttribute( "code" ) );
772                         my $val = $data->textContent;
773                         NFD($val);
774                         $val =~ s/(\pM+)//gso;
775                         $ns->value( lc($val) );
776
777                         push @ns_list, $ns;
778                 }
779         }
780
781         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
782         return @ns_list;
783 }
784
785 sub flat_marc_xml {
786         my $self = shift;
787         my $client = shift;
788         my $xml = shift;
789
790         $xml = $parser->parse_string($xml) unless (ref $xml);
791
792         my $type = 'metabib';
793         $type = 'authority' if ($self->api_name =~ /authority/o);
794
795         OpenILS::Application::WoRM->post_init();
796
797         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
798         return undef;
799 }
800 __PACKAGE__->register_method(  
801         api_name        => "open-ils.worm.flat_marc.authority.xml",
802         method          => "flat_marc_xml",
803         api_level       => 1,
804         argc            => 1,
805         stream          => 1,
806 );                      
807 __PACKAGE__->register_method(  
808         api_name        => "open-ils.worm.flat_marc.biblio.xml",
809         method          => "flat_marc_xml",
810         api_level       => 1,
811         argc            => 1,
812         stream          => 1,
813 );                      
814
815 sub flat_marc_record {
816         my $self = shift;
817         my $client = shift;
818         my $rec = shift;
819
820         my $type = 'biblio';
821         $type = 'authority' if ($self->api_name =~ /authority/o);
822
823         OpenILS::Application::WoRM->post_init();
824         my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
825
826         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
827         return undef;
828 }
829 __PACKAGE__->register_method(  
830         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
831         method          => "flat_marc_record",
832         api_level       => 1,
833         argc            => 1,
834         stream          => 1,
835 );                      
836 __PACKAGE__->register_method(  
837         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
838         method          => "flat_marc_record",
839         api_level       => 1,
840         argc            => 1,
841         stream          => 1,
842 );                      
843
844
845 # --------------------------------------------------------------------------------
846 # Fingerprinting
847
848 package OpenILS::Application::WoRM::Biblio::Fingerprint;
849 use base qw/OpenILS::Application::WoRM/;
850 use Unicode::Normalize;
851 use OpenSRF::EX qw/:try/;
852
853 my @fp_mods_xpath = (
854         '//mods:mods/mods:typeOfResource[text()="text"]' => [
855                         title   => {
856                                         xpath   => [
857                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
858                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
859                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
860                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
861                                         ],
862                                         fixup   => sub {
863                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
864                                                         NFD($text);
865                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
866                                                         $text =~ s/\pM+//gso;
867                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
868                                                         $text = lc($text);
869                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
870                                                         $text =~ s/\s+/ /sgo;
871                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
872                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
873                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
874                                                         $text =~ s/\b(?:the|an?)\b//sgo;
875                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
876                                                         $text =~ s/\[.[^\]]+\]//sgo;
877                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
878                                                         $text =~ s/\s*[;\/\.]*$//sgo;
879                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
880                                                 },
881                         },
882                         author  => {
883                                         xpath   => [
884                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
885                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
886                                         ],
887                                         fixup   => sub {
888                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
889                                                         NFD($text);
890                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
891                                                         $text =~ s/\pM+//gso;
892                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
893                                                         $text = lc($text);
894                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
895                                                         $text =~ s/\s+/ /sgo;
896                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
897                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
898                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
899                                                         $text =~ s/,?\s+.*$//sgo;
900                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
901                                                 },
902                         },
903         ],
904
905         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
906                         title   => {
907                                         xpath   => [
908                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
909                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
910                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
911                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
912                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
913                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
914                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
915                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
916                                         ],
917                                         fixup   => sub {
918                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
919                                                         NFD($text);
920                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
921                                                         $text =~ s/\pM+//gso;
922                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
923                                                         $text = lc($text);
924                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
925                                                         $text =~ s/\s+/ /sgo;
926                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
927                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
928                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
929                                                         $text =~ s/\b(?:the|an?)\b//sgo;
930                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
931                                                         $text =~ s/\[.[^\]]+\]//sgo;
932                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
933                                                         $text =~ s/\s*[;\/\.]*$//sgo;
934                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
935                                                 },
936                         },
937                         author  => {
938                                         xpath   => [
939                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
940                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
941                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
942                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
943                                         ],
944                                         fixup   => sub {
945                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
946                                                         NFD($text);
947                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
948                                                         $text =~ s/\pM+//gso;
949                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
950                                                         $text = lc($text);
951                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
952                                                         $text =~ s/\s+/ /sgo;
953                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
954                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
955                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
956                                                         $text =~ s/,?\s+.*$//sgo;
957                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
958                                                 },
959                         },
960         ],
961
962 );
963
964 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
965
966 sub _fp_mods {
967         my $mods = shift;
968         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
969
970         my $fp_string = '';
971
972         my $match_index = 0;
973         my $block_index = 1;
974         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
975                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
976
977                         my $block_name_index = 0;
978                         my $block_value_index = 1;
979                         my $block = $fp_mods_xpath[$block_index];
980                         while ( my $part = $$block[$block_value_index] ) {
981                                 local $text;
982                                 for my $xpath ( @{ $part->{xpath} } ) {
983                                         $text = $mods->findvalue( $xpath );
984                                         last if ($text);
985                                 }
986
987                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
988
989                                 if ($text) {
990                                         $$part{fixup}->();
991                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
992                                         $fp_string .= $text;
993                                 }
994
995                                 $block_name_index += 2;
996                                 $block_value_index += 2;
997                         }
998                 }
999                 if ($fp_string) {
1000                         $fp_string =~ s/\W+//gso;
1001                         $log->debug("Fingerprint is [$fp_string]", INFO);;
1002                         return $fp_string;
1003                 }
1004
1005                 $match_index += 2;
1006                 $block_index += 2;
1007         }
1008         return undef;
1009 }
1010
1011 sub refingerprint_bibrec {
1012         my $self = shift;
1013         my $client = shift;
1014         my $rec = shift;
1015
1016         my $commit = 0;
1017         if (!OpenILS::Application::WoRM->in_transaction) {
1018                 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1019                 $commit = 1;
1020         }
1021
1022         my $success = 1;
1023         try {
1024                 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1025                 OpenILS::Application::WoRM->storage_req(
1026                         'open-ils.storage.direct.biblio.record_entry.remote_update',
1027                         { id => $_->id },
1028                         { fingerprint => $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $_->marc ) }
1029                 ) for (@$bibs);
1030         } otherwise {
1031                 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1032                 $success = 0;
1033         };
1034
1035         OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
1036         OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
1037         return $success;
1038 }
1039 __PACKAGE__->register_method(  
1040         api_name        => "open-ils.worm.fingerprint.record.update",
1041         method          => "refingerprint_bibrec",
1042         api_level       => 1,
1043         argc            => 1,
1044 );                      
1045
1046
1047 sub fingerprint_bibrec {
1048         my $self = shift;
1049         my $client = shift;
1050         my $rec = shift;
1051
1052         OpenILS::Application::WoRM->post_init();
1053         my $r = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1054
1055         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1056         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1057         return $fp;
1058
1059 }
1060 __PACKAGE__->register_method(  
1061         api_name        => "open-ils.worm.fingerprint.record",
1062         method          => "fingerprint_bibrec",
1063         api_level       => 1,
1064         argc            => 1,
1065 );                      
1066
1067 sub fingerprint_mods {
1068         my $self = shift;
1069         my $client = shift;
1070         my $xml = shift;
1071
1072         OpenILS::Application::WoRM->post_init();
1073         my $mods = $parser->parse_string($xml)->documentElement;
1074
1075         return _fp_mods( $mods );
1076 }
1077 __PACKAGE__->register_method(  
1078         api_name        => "open-ils.worm.fingerprint.mods",
1079         method          => "fingerprint_mods",
1080         api_level       => 1,
1081         argc            => 1,
1082 );                      
1083
1084 sub fingerprint_marc {
1085         my $self = shift;
1086         my $client = shift;
1087         my $xml = shift;
1088
1089         $xml = $parser->parse_string($xml) unless (ref $xml);
1090
1091         OpenILS::Application::WoRM->post_init();
1092         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1093         $log->debug("Returning [$fp] as fingerprint", INFO);
1094         return $fp;
1095 }
1096 __PACKAGE__->register_method(  
1097         api_name        => "open-ils.worm.fingerprint.marc",
1098         method          => "fingerprint_marc",
1099         api_level       => 1,
1100         argc            => 1,
1101 );                      
1102
1103
1104 # --------------------------------------------------------------------------------
1105
1106 1;
1107 __END__
1108 my $in_xact;
1109 my $begin;
1110 my $commit;
1111 my $rollback;
1112 my $lookup;
1113 my $update_entry;
1114 my $mr_lookup;
1115 my $mr_update;
1116 my $mr_create;
1117 my $create_source_map;
1118 my $sm_lookup;
1119 my $rm_old_rd;
1120 my $rm_old_sm;
1121 my $rm_old_fr;
1122 my $rm_old_tr;
1123 my $rm_old_ar;
1124 my $rm_old_sr;
1125 my $rm_old_kr;
1126 my $rm_old_ser;
1127
1128 my $fr_create;
1129 my $rd_create;
1130 my $create = {};
1131
1132 my %descriptor_code = (
1133         item_type => 'substr($ldr,6,1)',
1134         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1135         bib_level => 'substr($ldr,7,1)',
1136         control_type => 'substr($ldr,8,1)',
1137         char_encoding => 'substr($ldr,9,1)',
1138         enc_level => 'substr($ldr,17,1)',
1139         cat_form => 'substr($ldr,18,1)',
1140         pub_status => 'substr($ldr,5,1)',
1141         item_lang => 'substr($oo8,35,3)',
1142         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1143         audience => 'substr($oo8,22,1)',
1144 );
1145
1146 sub wormize {
1147
1148         my $self = shift;
1149         my $client = shift;
1150         my @docids = @_;
1151
1152         my $no_map = 0;
1153         if ($self->api_name =~ /no_map/o) {
1154                 $no_map = 1;
1155         }
1156
1157         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1158                 unless ($in_xact);
1159         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1160                 unless ($begin);
1161         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1162                 unless ($commit);
1163         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1164                 unless ($rollback);
1165         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
1166                 unless ($sm_lookup);
1167         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
1168                 unless ($mr_lookup);
1169         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
1170                 unless ($mr_update);
1171         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
1172                 unless ($lookup);
1173         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
1174                 unless ($update_entry);
1175         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
1176                 unless ($rm_old_sm);
1177         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
1178                 unless ($rm_old_rd);
1179         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
1180                 unless ($rm_old_fr);
1181         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
1182                 unless ($rm_old_tr);
1183         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
1184                 unless ($rm_old_ar);
1185         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
1186                 unless ($rm_old_sr);
1187         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
1188                 unless ($rm_old_kr);
1189         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
1190                 unless ($rm_old_ser);
1191         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
1192                 unless ($mr_create);
1193         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
1194                 unless ($create_source_map);
1195         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
1196                 unless ($rd_create);
1197         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
1198                 unless ($fr_create);
1199         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
1200                 unless ($$create{title});
1201         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
1202                 unless ($$create{author});
1203         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
1204                 unless ($$create{subject});
1205         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
1206                 unless ($$create{keyword});
1207         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
1208                 unless ($$create{series});
1209
1210
1211         my ($outer_xact) = $in_xact->run;
1212         try {
1213                 unless ($outer_xact) {
1214                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1215                         my ($r) = $begin->run($client);
1216                         unless (defined $r and $r) {
1217                                 $rollback->run;
1218                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1219                         }
1220                 }
1221         } catch Error with {
1222                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1223         };
1224
1225         my @source_maps;
1226         my @entry_list;
1227         my @mr_list;
1228         my @rd_list;
1229         my @ns_list;
1230         my @mods_data;
1231         my $ret = 0;
1232         for my $entry ( $lookup->run(@docids) ) {
1233                 # step -1: grab the doc from storage
1234                 next unless ($entry);
1235
1236                 if(!$mods_sheet) {
1237                         my $xslt_doc = $parser->parse_file(
1238                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1239                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
1240                 }
1241
1242                 my $xml = $entry->marc;
1243                 my $docid = $entry->id;
1244                 my $marcdoc = $parser->parse_string($xml);
1245                 my $modsdoc = $mods_sheet->transform($marcdoc);
1246
1247                 my $mods = $modsdoc->documentElement;
1248                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1249
1250                 $entry->fingerprint( fingerprint_mods( $mods ) );
1251                 push @entry_list, $entry;
1252
1253                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
1254
1255                 unless ($no_map) {
1256                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
1257                         if (!$mr || !@$mr) {
1258                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
1259                                 $mr = new Fieldmapper::metabib::metarecord;
1260                                 $mr->fingerprint( $entry->fingerprint );
1261                                 $mr->master_record( $entry->id );
1262                                 my ($new_mr) = $mr_create->run($mr);
1263                                 $mr->id($new_mr);
1264                                 unless (defined $mr) {
1265                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
1266                                 }
1267                         } else {
1268                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
1269                                 $mr->mods('');
1270                                 push @mr_list, $mr;
1271                         }
1272
1273                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
1274                         $sm->metarecord( $mr->id );
1275                         $sm->source( $entry->id );
1276                         push @source_maps, $sm;
1277                 }
1278
1279                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1280                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1281
1282                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1283                 for my $rd_field ( keys %descriptor_code ) {
1284                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1285                 }
1286                 $rd_obj->record( $docid );
1287                 push @rd_list, $rd_obj;
1288
1289                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
1290
1291                 # step 2: build the KOHA rows
1292                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
1293                 $_->record( $docid ) for (@tmp_list);
1294                 push @ns_list, @tmp_list;
1295
1296                 $ret++;
1297
1298                 last unless ($self->api_name =~ /batch$/o);
1299         }
1300
1301         $rm_old_rd->run( { record => \@docids } );
1302         $rm_old_fr->run( { record => \@docids } );
1303         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
1304         $rm_old_tr->run( { source => \@docids } );
1305         $rm_old_ar->run( { source => \@docids } );
1306         $rm_old_sr->run( { source => \@docids } );
1307         $rm_old_kr->run( { source => \@docids } );
1308         $rm_old_ser->run( { source => \@docids } );
1309
1310         unless ($no_map) {
1311                 my ($sm) = $create_source_map->run(@source_maps);
1312                 unless (defined $sm) {
1313                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
1314                 }
1315                 my ($mr) = $mr_update->run(@mr_list);
1316                 unless (defined $mr) {
1317                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
1318                 }
1319         }
1320
1321         my ($re) = $update_entry->run(@entry_list);
1322         unless (defined $re) {
1323                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
1324         }
1325
1326         my ($rd) = $rd_create->run(@rd_list);
1327         unless (defined $rd) {
1328                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
1329         }
1330
1331         my ($fr) = $fr_create->run(@ns_list);
1332         unless (defined $fr) {
1333                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
1334         }
1335
1336         # step 5: insert the new metadata
1337         for my $class ( qw/title author subject keyword series/ ) {
1338                 my @md_list = ();
1339                 for my $doc ( @mods_data ) {
1340                         my ($did) = keys %$doc;
1341                         my ($data) = values %$doc;
1342
1343                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
1344                         for my $row ( keys %{ $$data{$class} } ) {
1345                                 next unless (exists $$data{$class}{$row});
1346                                 next unless ($$data{$class}{$row}{value});
1347                                 my $fm_obj = $fm_constructor->new;
1348                                 $fm_obj->value( $$data{$class}{$row}{value} );
1349                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
1350                                 $fm_obj->source( $did );
1351                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
1352
1353                                 push @md_list, $fm_obj;
1354                         }
1355                 }
1356                         
1357                 my ($cr) = $$create{$class}->run(@md_list);
1358                 unless (defined $cr) {
1359                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
1360                 }
1361         }
1362
1363         unless ($outer_xact) {
1364                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1365                 my ($c) = $commit->run;
1366                 unless (defined $c and $c) {
1367                         $rollback->run;
1368                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1369                 }
1370         }
1371
1372         return $ret;
1373 }
1374 __PACKAGE__->register_method( 
1375         api_name        => "open-ils.worm.wormize",
1376         method          => "wormize",
1377         api_level       => 1,
1378         argc            => 1,
1379 );
1380 __PACKAGE__->register_method( 
1381         api_name        => "open-ils.worm.wormize.no_map",
1382         method          => "wormize",
1383         api_level       => 1,
1384         argc            => 1,
1385 );
1386 __PACKAGE__->register_method( 
1387         api_name        => "open-ils.worm.wormize.batch",
1388         method          => "wormize",
1389         api_level       => 1,
1390         argc            => 1,
1391 );
1392 __PACKAGE__->register_method( 
1393         api_name        => "open-ils.worm.wormize.no_map.batch",
1394         method          => "wormize",
1395         api_level       => 1,
1396         argc            => 1,
1397 );
1398
1399
1400 my $ain_xact;
1401 my $abegin;
1402 my $acommit;
1403 my $arollback;
1404 my $alookup;
1405 my $aupdate_entry;
1406 my $amr_lookup;
1407 my $amr_update;
1408 my $amr_create;
1409 my $acreate_source_map;
1410 my $asm_lookup;
1411 my $arm_old_rd;
1412 my $arm_old_sm;
1413 my $arm_old_fr;
1414 my $arm_old_tr;
1415 my $arm_old_ar;
1416 my $arm_old_sr;
1417 my $arm_old_kr;
1418 my $arm_old_ser;
1419
1420 my $afr_create;
1421 my $ard_create;
1422 my $acreate = {};
1423
1424 sub authority_wormize {
1425
1426         my $self = shift;
1427         my $client = shift;
1428         my @docids = @_;
1429
1430         my $no_map = 0;
1431         if ($self->api_name =~ /no_map/o) {
1432                 $no_map = 1;
1433         }
1434
1435         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1436                 unless ($in_xact);
1437         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1438                 unless ($begin);
1439         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1440                 unless ($commit);
1441         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1442                 unless ($rollback);
1443         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1444                 unless ($alookup);
1445         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1446                 unless ($aupdate_entry);
1447         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1448                 unless ($arm_old_rd);
1449         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1450                 unless ($arm_old_fr);
1451         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1452                 unless ($ard_create);
1453         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1454                 unless ($afr_create);
1455
1456
1457         my ($outer_xact) = $in_xact->run;
1458         try {
1459                 unless ($outer_xact) {
1460                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1461                         my ($r) = $begin->run($client);
1462                         unless (defined $r and $r) {
1463                                 $rollback->run;
1464                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1465                         }
1466                 }
1467         } catch Error with {
1468                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1469         };
1470
1471         my @source_maps;
1472         my @entry_list;
1473         my @mr_list;
1474         my @rd_list;
1475         my @ns_list;
1476         my @mads_data;
1477         my $ret = 0;
1478         for my $entry ( $lookup->run(@docids) ) {
1479                 # step -1: grab the doc from storage
1480                 next unless ($entry);
1481
1482                 #if(!$mads_sheet) {
1483                 #       my $xslt_doc = $parser->parse_file(
1484                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1485                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1486                 #}
1487
1488                 my $xml = $entry->marc;
1489                 my $docid = $entry->id;
1490                 my $marcdoc = $parser->parse_string($xml);
1491                 #my $madsdoc = $mads_sheet->transform($marcdoc);
1492
1493                 #my $mads = $madsdoc->documentElement;
1494                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1495
1496                 push @entry_list, $entry;
1497
1498                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1499                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1500
1501                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1502                 for my $rd_field ( keys %descriptor_code ) {
1503                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1504                 }
1505                 $rd_obj->record( $docid );
1506                 push @rd_list, $rd_obj;
1507
1508                 # step 2: build the KOHA rows
1509                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1510                 $_->record( $docid ) for (@tmp_list);
1511                 push @ns_list, @tmp_list;
1512
1513                 $ret++;
1514
1515                 last unless ($self->api_name =~ /batch$/o);
1516         }
1517
1518         $arm_old_rd->run( { record => \@docids } );
1519         $arm_old_fr->run( { record => \@docids } );
1520
1521         my ($rd) = $ard_create->run(@rd_list);
1522         unless (defined $rd) {
1523                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1524         }
1525
1526         my ($fr) = $fr_create->run(@ns_list);
1527         unless (defined $fr) {
1528                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1529         }
1530
1531         unless ($outer_xact) {
1532                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1533                 my ($c) = $commit->run;
1534                 unless (defined $c and $c) {
1535                         $rollback->run;
1536                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1537                 }
1538         }
1539
1540         return $ret;
1541 }
1542 __PACKAGE__->register_method( 
1543         api_name        => "open-ils.worm.authortiy.wormize",
1544         method          => "wormize",
1545         api_level       => 1,
1546         argc            => 1,
1547 );
1548 __PACKAGE__->register_method( 
1549         api_name        => "open-ils.worm.authority.wormize.batch",
1550         method          => "wormize",
1551         api_level       => 1,
1552         argc            => 1,
1553 );
1554
1555
1556 # --------------------------------------------------------------------------------
1557
1558
1559 sub _marcxml_to_full_rows {
1560
1561         my $marcxml = shift;
1562         my $type = shift || 'Fieldmapper::metabib::full_rec';
1563
1564         my @ns_list;
1565         
1566         my $root = $marcxml->documentElement;
1567
1568         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1569                 next unless $tagline;
1570
1571                 my $ns = new Fieldmapper::metabib::full_rec;
1572
1573                 $ns->tag( 'LDR' );
1574                 my $val = NFD($tagline->textContent);
1575                 $val =~ s/(\pM+)//gso;
1576                 $ns->value( $val );
1577
1578                 push @ns_list, $ns;
1579         }
1580
1581         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1582                 next unless $tagline;
1583
1584                 my $ns = new Fieldmapper::metabib::full_rec;
1585
1586                 $ns->tag( $tagline->getAttribute( "tag" ) );
1587                 my $val = NFD($tagline->textContent);
1588                 $val =~ s/(\pM+)//gso;
1589                 $ns->value( $val );
1590
1591                 push @ns_list, $ns;
1592         }
1593
1594         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1595                 next unless $tagline;
1596
1597                 my $tag = $tagline->getAttribute( "tag" );
1598                 my $ind1 = $tagline->getAttribute( "ind1" );
1599                 my $ind2 = $tagline->getAttribute( "ind2" );
1600
1601                 for my $data ( $tagline->childNodes ) {
1602                         next unless $data;
1603
1604                         my $ns = $type->new;
1605
1606                         $ns->tag( $tag );
1607                         $ns->ind1( $ind1 );
1608                         $ns->ind2( $ind2 );
1609                         $ns->subfield( $data->getAttribute( "code" ) );
1610                         my $val = NFD($data->textContent);
1611                         $val =~ s/(\pM+)//gso;
1612                         $ns->value( lc($val) );
1613
1614                         push @ns_list, $ns;
1615                 }
1616         }
1617         return @ns_list;
1618 }
1619
1620 sub _get_field_value {
1621
1622         my( $root, $xpath ) = @_;
1623
1624         my $string = "";
1625
1626         # grab the set of matching nodes
1627         my @nodes = $root->findnodes( $xpath );
1628         for my $value (@nodes) {
1629
1630                 # grab all children of the node
1631                 my @children = $value->childNodes();
1632                 for my $child (@children) {
1633
1634                         # add the childs content to the growing buffer
1635                         my $content = quotemeta($child->textContent);
1636                         next if ($string =~ /$content/);  # uniquify the values
1637                         $string .= $child->textContent . " ";
1638                 }
1639                 if( ! @children ) {
1640                         $string .= $value->textContent . " ";
1641                 }
1642         }
1643         $string = NFD($string);
1644         $string =~ s/(\pM)//gso;
1645         return lc($string);
1646 }
1647
1648
1649 sub modsdoc_to_values {
1650         my( $self, $mods ) = @_;
1651         my $data = {};
1652         for my $class (keys %$xpathset) {
1653                 $data->{$class} = {};
1654                 for my $type (keys %{$xpathset->{$class}}) {
1655                         $data->{$class}->{$type} = {};
1656                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
1657                 }
1658         }
1659         return $data;
1660 }
1661
1662
1663 1;
1664
1665