]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Storage/WORM.pm
fleshed out "actor.usr" interface, and stream behavior adjustment
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Storage / WORM.pm
1 package OpenILS::Application::Storage::WORM;
2 use base qw/OpenILS::Application::Storage/;
3 use strict; use warnings;
4
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::Logger qw/:level/;
8 my $log = 'OpenSRF::Utils::Logger';
9
10 use OpenILS::Utils::FlatXML;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use XML::LibXML;
15 use XML::LibXSLT;
16 use Time::HiRes qw(time);
17
18 my $xml_util    = OpenILS::Utils::FlatXML->new();
19
20 my $parser              = XML::LibXML->new();
21 my $xslt                        = XML::LibXSLT->new();
22 my $xslt_doc    =       $parser->parse_file( "/home/miker/cvs/OpenILS/app_server/stylesheets/MARC21slim2MODS.xsl" );
23 #my $xslt_doc   = $parser->parse_file( "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
24 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
25
26 use open qw/:utf8/;
27
28 my $xpathset = {};
29
30 sub child_init {
31         my $meth = __PACKAGE__->method_lookup('open-ils.storage.direct.config.metabib_field.all');
32         for my $f ($meth->run) {
33                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
34                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
35                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
36         }
37 }
38
39 # --------------------------------------------------------------------------------
40 # Fingerprinting
41
42 my @fp_mods_xpath = (
43         '//mods:mods/mods:typeOfResource[text()="text"]' => [
44                         title   => {
45                                         xpath   => [
46                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
47                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
48                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
49                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
50                                         ],
51                                         fixup   => '
52                                                         do {
53                                                                 $text = lc($text);
54                                                                 $text =~ s/\s+/ /sgo;
55                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
56                                                                 $text =~ s/\b(?:the|an?)\b//sgo;
57                                                                 $text =~ s/\[.[^\]]+\]//sgo;
58                                                                 $text =~ s/\s*[;\/\.]*$//sgo;
59                                                                 $text =~ s/\pM+//gso;
60                                                         };
61                                         ',
62                         },
63                         author  => {
64                                         xpath   => [
65                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
66                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
67                                         ],
68                                         fixup   => '
69                                                         do {
70                                                                 $text = lc($text);
71                                                                 $text =~ s/\s+/ /sgo;
72                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
73                                                                 $text =~ s/,?\s+.*$//sgo;
74                                                                 $text =~ s/\pM+//gso;
75                                                         };
76                                         ',
77                         },
78         ],
79
80         '//mods:mods/mods:relatedItem[@type!="host"]' => [
81                         title   => {
82                                         xpath   => [
83                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
84                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
85                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
86                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
87                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
88                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
89                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
90                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
91                                         ],
92                                         fixup   => '
93                                                         do {
94                                                                 $text = lc($text);
95                                                                 $text =~ s/\s+/ /sgo;
96                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
97                                                                 $text =~ s/\b(?:the|an?)\b//sgo;
98                                                                 $text =~ s/\[.[^\]]+\]//sgo;
99                                                                 $text =~ s/\s*[;\/\.]*$//sgo;
100                                                                 $text =~ s/\pM+//gso;
101                                                         };
102                                         ',
103                         },
104                         author  => {
105                                         xpath   => [
106                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
107                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
108                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
109                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
110                                         ],
111                                         fixup   => '
112                                                         do {
113                                                                 $text = lc($text);
114                                                                 $text =~ s/\s+/ /sgo;
115                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
116                                                                 $text =~ s/,?\s+.*$//sgo;
117                                                                 $text =~ s/\pM+//gso;
118                                                         };
119                                         ',
120                         },
121         ],
122
123 );
124
125 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
126
127 sub fingerprint_mods {
128         my $mods = shift;
129
130         my $fp_string = '';
131
132         my $match_index = 0;
133         my $block_index = 1;
134         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
135                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
136
137                         my $block_name_index = 0;
138                         my $block_value_index = 1;
139                         my $block = $fp_mods_xpath[$block_index];
140                         while ( my $part = $$block[$block_value_index] ) {
141                                 my $text;
142                                 for my $xpath ( @{ $part->{xpath} } ) {
143                                         $text = $mods->findvalue( $xpath );
144                                         last if ($text);
145                                 }
146
147                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
148
149                                 if ($text) {
150                                         eval $$part{fixup};
151                                         $fp_string .= $text;
152                                 }
153
154                                 $block_name_index += 2;
155                                 $block_value_index += 2;
156                         }
157                 }
158                 if ($fp_string) {
159                         $fp_string =~ s/\W+//gso;
160                         $log->debug("Fingerprint is [$fp_string]", INFO);;
161                         return $fp_string;
162                 }
163
164                 $match_index += 2;
165                 $block_index += 2;
166         }
167         return undef;
168 }
169
170
171
172 # --------------------------------------------------------------------------------
173
174 my $begin;
175 my $commit;
176 my $rollback;
177 my $lookup;
178 my $update_entry;
179 my $mr_lookup;
180 my $mr_update;
181 my $mr_create;
182 my $create_source_map;
183 my $sm_lookup;
184 my $rm_old_rd;
185 my $rm_old_sm;
186 my $rm_old_fr;
187 my $rm_old_tr;
188 my $rm_old_ar;
189 my $rm_old_sr;
190 my $rm_old_kr;
191
192 my $fr_create;
193 my $rd_create;
194 my $create = {};
195
196 my %descriptor_code = (
197         item_type => 'substr($ldr,6,1)',
198         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
199         bib_level => 'substr($ldr,7,1)',
200         control_type => 'substr($ldr,8,1)',
201         char_encoding => 'substr($ldr,9,1)',
202         enc_level => 'substr($ldr,17,1)',
203         cat_form => 'substr($ldr,18,1)',
204         pub_status => 'substr($ldr,5,1)',
205         item_lang => 'substr($oo8,35,3)',
206         audience => 'substr($oo8,22,1)',
207 );
208
209 sub wormize {
210
211         my $self = shift;
212         my $client = shift;
213         my @docids = @_;
214
215         my $no_map = 0;
216         if ($self->api_name =~ /no_map/o) {
217                 $no_map = 1;
218         }
219
220         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
221                 unless ($begin);
222         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
223                 unless ($commit);
224         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
225                 unless ($rollback);
226         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
227                 unless ($sm_lookup);
228         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
229                 unless ($mr_lookup);
230         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
231                 unless ($mr_update);
232         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
233                 unless ($mr_create);
234         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
235                 unless ($create_source_map);
236         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
237                 unless ($lookup);
238         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
239                 unless ($update_entry);
240         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
241                 unless ($rm_old_sm);
242         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
243                 unless ($rm_old_rd);
244         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
245                 unless ($rm_old_fr);
246         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
247                 unless ($rm_old_tr);
248         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
249                 unless ($rm_old_ar);
250         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
251                 unless ($rm_old_sr);
252         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
253                 unless ($rm_old_kr);
254         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
255                 unless ($rd_create);
256         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
257                 unless ($fr_create);
258         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
259                 unless ($$create{title});
260         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
261                 unless ($$create{author});
262         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
263                 unless ($$create{subject});
264         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
265                 unless ($$create{keyword});
266
267
268         try {
269                 my ($r) = $begin->run($client);
270                 unless (defined $r and $r) {
271                         $rollback->run;
272                         throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
273                 }
274         } catch Error with {
275                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
276         };
277
278         my @source_maps;
279         my @entry_list;
280         my @mr_list;
281         my @rd_list;
282         my @ns_list;
283         my @mods_data;
284         my $ret = 0;
285         for my $entry ( $lookup->run(@docids) ) {
286                 # step -1: grab the doc from storage
287                 next unless ($entry);
288
289                 my $xml = $entry->marc;
290                 my $docid = $entry->id;
291                 my $marcdoc = $parser->parse_string($xml);
292                 my $modsdoc = $mods_sheet->transform($marcdoc);
293
294                 my $mods = $modsdoc->documentElement;
295                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
296
297                 $entry->fingerprint( fingerprint_mods( $mods ) );
298                 push @entry_list, $entry;
299
300                 unless ($no_map) {
301                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
302                         if (!@$mr) {
303                                 $mr = new Fieldmapper::metabib::metarecord;
304                                 $mr->fingerprint( $entry->fingerprint );
305                                 $mr->master_record( $entry->id );
306                                 my ($new_mr) = $mr_create->run($mr);
307                                 $mr->id($new_mr);
308                                 unless (defined $mr) {
309                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
310                                 }
311                         } else {
312                                 $mr = $$mr[0];
313                                 $mr->mods('');
314                                 push @mr_list, $mr;
315                         }
316
317                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
318                         $sm->metarecord( $mr->id );
319                         $sm->source( $entry->id );
320                         push @source_maps, $sm;
321                 }
322
323                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
324                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
325
326                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
327                 for my $rd_field ( keys %descriptor_code ) {
328                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
329                 }
330                 $rd_obj->record( $docid );
331                 push @rd_list, $rd_obj;
332
333                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
334
335                 # step 2: build the KOHA rows
336                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
337                 $_->record( $docid ) for (@tmp_list);
338                 push @ns_list, @tmp_list;
339
340                 $ret++;
341
342                 last unless ($self->api_name =~ /batch$/o);
343         }
344
345         $rm_old_rd->run( { record => \@docids } );
346         $rm_old_fr->run( { record => \@docids } );
347         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
348         $rm_old_tr->run( { source => \@docids } );
349         $rm_old_ar->run( { source => \@docids } );
350         $rm_old_sr->run( { source => \@docids } );
351         $rm_old_kr->run( { source => \@docids } );
352
353         unless ($no_map) {
354                 my ($sm) = $create_source_map->run(@source_maps);
355                 unless (defined $sm) {
356                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
357                 }
358                 my ($mr) = $mr_update->run(@mr_list);
359                 unless (defined $mr) {
360                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
361                 }
362         }
363
364         my ($re) = $update_entry->run(@entry_list);
365         unless (defined $re) {
366                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
367         }
368
369         my ($rd) = $rd_create->run(@rd_list);
370         unless (defined $rd) {
371                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
372         }
373
374         my ($fr) = $fr_create->run(@ns_list);
375         unless (defined $fr) {
376                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
377         }
378
379         # step 5: insert the new metadata
380         for my $class ( qw/title author subject keyword/ ) {
381                 my @md_list = ();
382                 for my $doc ( @mods_data ) {
383                         my ($did) = keys %$doc;
384                         my ($data) = values %$doc;
385
386                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
387                         for my $row ( keys %{ $$data{$class} } ) {
388                                 next unless (exists $$data{$class}{$row});
389                                 next unless ($$data{$class}{$row}{value});
390                                 my $fm_obj = $fm_constructor->new;
391                                 $fm_obj->value( $$data{$class}{$row}{value} );
392                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
393                                 $fm_obj->source( $did );
394                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
395
396                                 push @md_list, $fm_obj;
397                         }
398                 }
399                         
400                 my ($cr) = $$create{$class}->run(@md_list);
401                 unless (defined $cr) {
402                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
403                 }
404         }
405
406         my ($c) = $commit->run;
407         unless (defined $c and $c) {
408                 $rollback->run;
409                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
410         }
411
412         return $ret;
413 }
414 __PACKAGE__->register_method( 
415         api_name        => "open-ils.worm.wormize",
416         method          => "wormize",
417         api_level       => 1,
418         argc            => 1,
419 );
420 __PACKAGE__->register_method( 
421         api_name        => "open-ils.worm.wormize.no_map",
422         method          => "wormize",
423         api_level       => 1,
424         argc            => 1,
425 );
426 __PACKAGE__->register_method( 
427         api_name        => "open-ils.worm.wormize.batch",
428         method          => "wormize",
429         api_level       => 1,
430         argc            => 1,
431 );
432 __PACKAGE__->register_method( 
433         api_name        => "open-ils.worm.wormize.no_map.batch",
434         method          => "wormize",
435         api_level       => 1,
436         argc            => 1,
437 );
438
439
440 # --------------------------------------------------------------------------------
441
442
443 sub _marcxml_to_full_rows {
444
445         my $marcxml = shift;
446
447         my @ns_list;
448         
449         my $root = $marcxml->documentElement;
450
451         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
452                 next unless $tagline;
453
454                 my $ns = new Fieldmapper::metabib::full_rec;
455
456                 $ns->tag( 'LDR' );
457                 my $val = $tagline->textContent;
458                 $val =~ s/(\pM)//gso;
459                 $ns->value( $val );
460
461                 push @ns_list, $ns;
462         }
463
464         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
465                 next unless $tagline;
466
467                 my $ns = new Fieldmapper::metabib::full_rec;
468
469                 $ns->tag( $tagline->getAttribute( "tag" ) );
470                 my $val = $tagline->textContent;
471                 $val =~ s/(\pM)//gso;
472                 $ns->value( $val );
473
474                 push @ns_list, $ns;
475         }
476
477         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
478                 next unless $tagline;
479
480                 my $tag = $tagline->getAttribute( "tag" );
481                 my $ind1 = $tagline->getAttribute( "ind1" );
482                 my $ind2 = $tagline->getAttribute( "ind2" );
483
484                 for my $data ( $tagline->childNodes ) {
485                         next unless $data;
486
487                         my $ns = new Fieldmapper::metabib::full_rec;
488
489                         $ns->tag( $tag );
490                         $ns->ind1( $ind1 );
491                         $ns->ind2( $ind2 );
492                         $ns->subfield( $data->getAttribute( "code" ) );
493                         my $val = $data->textContent;
494                         $val =~ s/(\pM)//gso;
495                         $ns->value( lc($val) );
496
497                         push @ns_list, $ns;
498                 }
499         }
500         return @ns_list;
501 }
502
503 sub _get_field_value {
504
505         my( $root, $xpath ) = @_;
506
507         my $string = "";
508
509         # grab the set of matching nodes
510         my @nodes = $root->findnodes( $xpath );
511         for my $value (@nodes) {
512
513                 # grab all children of the node
514                 my @children = $value->childNodes();
515                 for my $child (@children) {
516
517                         # add the childs content to the growing buffer
518                         my $content = quotemeta($child->textContent);
519                         next if ($string =~ /$content/);  # uniquify the values
520                         $string .= $child->textContent . " ";
521                 }
522                 if( ! @children ) {
523                         $string .= $value->textContent . " ";
524                 }
525         }
526         $string =~ s/(\pM)//gso;
527         return lc($string);
528 }
529
530
531 sub modsdoc_to_values {
532         my( $self, $mods ) = @_;
533         my $data = {};
534         for my $class (keys %$xpathset) {
535                 $data->{$class} = {};
536                 for my $type (keys %{$xpathset->{$class}}) {
537                         $data->{$class}->{$type} = {};
538                         $data->{$class}->{$type}->{value} = _get_field_value( $mods, $xpathset->{$class}->{$type}->{xpath} );
539                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
540                 }
541         }
542         return $data;
543 }
544
545
546 1;
547
548