]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Storage/WORM.pm
record_marc is now merged into record_entry
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Storage / WORM.pm
1 package OpenILS::Application::Storage::WORM;
2 use base qw/OpenILS::Application::Storage/;
3 use strict; use warnings;
4
5 use OpenSRF::EX qw/:try/;
6
7 use OpenSRF::Utils::Logger qw/:level/;
8 my $log = 'OpenSRF::Utils::Logger';
9
10 use OpenILS::Utils::FlatXML;
11 use OpenILS::Utils::Fieldmapper;
12 use JSON;
13
14 use XML::LibXML;
15 use XML::LibXSLT;
16 use Time::HiRes qw(time);
17
18 my $xml_util    = OpenILS::Utils::FlatXML->new();
19
20 my $parser              = XML::LibXML->new();
21 my $xslt                        = XML::LibXSLT->new();
22 my $xslt_doc    =       $parser->parse_file( "/home/miker/cvs/OpenILS/app_server/stylesheets/MARC21slim2MODS.xsl" );
23 #my $xslt_doc   = $parser->parse_file( "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
24 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
25
26 use open qw/:utf8/;
27
28 my $xpathset = {};
29
30 sub child_init {
31         my $meth = __PACKAGE__->method_lookup('open-ils.storage.direct.config.metabib_field.all');
32         for my $f ($meth->run) {
33                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
34                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
35                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
36         }
37 }
38
39 # --------------------------------------------------------------------------------
40 # Fingerprinting
41
42 my @fp_mods_xpath = (
43         '//mods:mods/mods:typeOfResource[text()="text"]' => [
44                         title   => {
45                                         xpath   => [
46                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
47                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
48                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
49                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
50                                         ],
51                                         fixup   => '
52                                                         do {
53                                                                 $text = lc($text);
54                                                                 $text =~ s/\s+/ /sgo;
55                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
56                                                                 $text =~ s/\b(?:the|an?)\b//sgo;
57                                                                 $text =~ s/\[.[^\]]+\]//sgo;
58                                                                 $text =~ s/\s*[;\/\.]*$//sgo;
59                                                         };
60                                         ',
61                         },
62                         author  => {
63                                         xpath   => [
64                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
65                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
66                                         ],
67                                         fixup   => '
68                                                         do {
69                                                                 $text = lc($text);
70                                                                 $text =~ s/\s+/ /sgo;
71                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
72                                                                 ($text) = split ",", $text;
73                                                         };
74                                         ',
75                         },
76         ],
77
78         '//mods:mods/mods:relatedItem[@type!="host"]' => [
79                         title   => {
80                                         xpath   => [
81                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
82                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
83                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
84                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
85                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
86                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
87                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
88                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
89                                         ],
90                                         fixup   => '
91                                                         do {
92                                                                 $text = lc($text);
93                                                                 $text =~ s/\s+/ /sgo;
94                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
95                                                                 $text =~ s/\b(?:the|an?)\b//sgo;
96                                                                 $text =~ s/\[.[^\]]+\]//sgo;
97                                                                 $text =~ s/\s*[;\/\.]*$//sgo;
98                                                         };
99                                         ',
100                         },
101                         author  => {
102                                         xpath   => [
103                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
104                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
105                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
106                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
107                                         ],
108                                         fixup   => '
109                                                         do {
110                                                                 $text = lc($text);
111                                                                 $text =~ s/\s+/ /sgo;
112                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
113                                                                 ($text) = split ",", $text;
114                                                         };
115                                         ',
116                         },
117         ],
118
119 );
120
121 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
122
123 sub fingerprint_mods {
124         my $mods = shift;
125
126         my $fp_string = '';
127
128         my $match_index = 0;
129         my $block_index = 1;
130         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
131                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
132
133                         my $block_name_index = 0;
134                         my $block_value_index = 1;
135                         my $block = $fp_mods_xpath[$block_index];
136                         while ( my $part = $$block[$block_value_index] ) {
137                                 my $text;
138                                 for my $xpath ( @{ $part->{xpath} } ) {
139                                         $text = $mods->findvalue( $xpath );
140                                         last if ($text);
141                                 }
142
143                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
144
145                                 if ($text) {
146                                         eval $$part{fixup};
147                                         $fp_string .= $text;
148                                 }
149
150                                 $block_name_index += 2;
151                                 $block_value_index += 2;
152                         }
153                 }
154                 if ($fp_string) {
155                         $fp_string =~ s/\W+//gso;
156                         $log->debug("Fingerprint is [$fp_string]", INFO);;
157                         return $fp_string;
158                 }
159
160                 $match_index += 2;
161                 $block_index += 2;
162         }
163         return undef;
164 }
165
166
167
168 # --------------------------------------------------------------------------------
169
170 my $begin;
171 my $commit;
172 my $rollback;
173 my $lookup;
174 my $fetch_entry;
175 my $update_entry;
176 my $rm_old_rd;
177 my $rm_old_fr;
178 my $rm_old_tr;
179 my $rm_old_ar;
180 my $rm_old_sr;
181 my $rm_old_kr;
182
183 my $fr_create;
184 my $rd_create;
185 my $create = {};
186
187 my %descriptor_code = (
188         item_type => 'substr($ldr,6,1)',
189         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
190         bib_level => 'substr($ldr,7,1)',
191         control_type => 'substr($ldr,8,1)',
192         char_encoding => 'substr($ldr,9,1)',
193         enc_level => 'substr($ldr,17,1)',
194         cat_form => 'substr($ldr,18,1)',
195         pub_status => 'substr($ldr,5,1)',
196         item_lang => 'substr($oo8,35,3)',
197         audience => 'substr($oo8,22,1)',
198 );
199
200 sub wormize {
201
202         my $self = shift;
203         my $client = shift;
204         my @docids = @_;
205
206         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
207                 unless ($begin);
208         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
209                 unless ($commit);
210         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
211                 unless ($rollback);
212         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
213                 unless ($lookup);
214         $fetch_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.retrieve')
215                 unless ($update_entry);
216         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
217                 unless ($update_entry);
218         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
219                 unless ($rm_old_rd);
220         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
221                 unless ($rm_old_fr);
222         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
223                 unless ($rm_old_tr);
224         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
225                 unless ($rm_old_ar);
226         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
227                 unless ($rm_old_sr);
228         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
229                 unless ($rm_old_kr);
230         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
231                 unless ($rd_create);
232         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
233                 unless ($fr_create);
234         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
235                 unless ($$create{title});
236         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
237                 unless ($$create{author});
238         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
239                 unless ($$create{subject});
240         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
241                 unless ($$create{keyword});
242
243
244         try {
245                 my ($r) = $begin->run($client);
246                 unless (defined $r and $r) {
247                         $rollback->run;
248                         throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
249                 }
250         } catch Error with {
251                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
252         };
253
254         my @entry_list;
255         my @rd_list;
256         my @ns_list;
257         my @mods_data;
258         my $ret = 0;
259         for my $marc ( $lookup->run(@docids) ) {
260                 # step -1: grab the doc from storage
261                 next unless ($marc);
262
263                 my $xml = $marc->marc;
264                 my $docid = $marc->id;
265                 my $marcdoc = $parser->parse_string($xml);
266                 my $modsdoc = $mods_sheet->transform($marcdoc);
267
268                 my $mods = $modsdoc->documentElement;
269                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
270
271                 my ($entry) = $fetch_entry->run($docid);
272                 $entry->fingerprint( fingerprint_mods( $mods ) );
273                 push @entry_list, $entry;
274
275                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
276                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
277
278                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
279                 for my $rd_field ( keys %descriptor_code ) {
280                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
281                 }
282                 $rd_obj->record( $docid );
283                 push @rd_list, $rd_obj;
284
285                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
286
287                 # step 2: build the KOHA rows
288                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
289                 $_->record( $docid ) for (@tmp_list);
290                 push @ns_list, @tmp_list;
291
292                 $ret++;
293
294                 last unless ($self->api_name =~ /batch$/o);
295         }
296
297         $rm_old_rd->run( { record => \@docids } );
298         $rm_old_fr->run( { record => \@docids } );
299         $rm_old_tr->run( { source => \@docids } );
300         $rm_old_ar->run( { source => \@docids } );
301         $rm_old_sr->run( { source => \@docids } );
302         $rm_old_kr->run( { source => \@docids } );
303
304         my ($re) = $update_entry->run(@entry_list);
305         unless (defined $re) {
306                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
307         }
308
309         my ($rd) = $rd_create->run(@rd_list);
310         unless (defined $rd) {
311                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
312         }
313
314         my ($fr) = $fr_create->run(@ns_list);
315         unless (defined $fr) {
316                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
317         }
318
319         # step 5: insert the new metadata
320         for my $class ( qw/title author subject keyword/ ) {
321                 my @md_list = ();
322                 for my $doc ( @mods_data ) {
323                         my ($did) = keys %$doc;
324                         my ($data) = values %$doc;
325
326                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
327                         for my $row ( keys %{ $$data{$class} } ) {
328                                 next unless (exists $$data{$class}{$row});
329                                 next unless ($$data{$class}{$row}{value});
330                                 my $fm_obj = $fm_constructor->new;
331                                 $fm_obj->value( $$data{$class}{$row}{value} );
332                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
333                                 $fm_obj->source( $did );
334                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
335
336                                 push @md_list, $fm_obj;
337                         }
338                 }
339                         
340                 my ($cr) = $$create{$class}->run(@md_list);
341                 unless (defined $cr) {
342                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
343                 }
344         }
345
346         my ($c) = $commit->run;
347         unless (defined $c and $c) {
348                 $rollback->run;
349                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
350         }
351
352         return $ret;
353 }
354 __PACKAGE__->register_method( 
355         api_name        => "open-ils.worm.wormize",
356         method          => "wormize",
357         api_level       => 1,
358         argc            => 1,
359 );
360 __PACKAGE__->register_method( 
361         api_name        => "open-ils.worm.wormize.batch",
362         method          => "wormize",
363         api_level       => 1,
364         argc            => 1,
365 );
366
367
368 # --------------------------------------------------------------------------------
369
370
371 sub _marcxml_to_full_rows {
372
373         my $marcxml = shift;
374
375         my @ns_list;
376         
377         my $root = $marcxml->documentElement;
378
379         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
380                 next unless $tagline;
381
382                 my $ns = new Fieldmapper::metabib::full_rec;
383
384                 $ns->tag( 'LDR' );
385                 my $val = $tagline->textContent;
386                 $val =~ s/(\pM)//gso;
387                 $ns->value( $val );
388
389                 push @ns_list, $ns;
390         }
391
392         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
393                 next unless $tagline;
394
395                 my $ns = new Fieldmapper::metabib::full_rec;
396
397                 $ns->tag( $tagline->getAttribute( "tag" ) );
398                 my $val = $tagline->textContent;
399                 $val =~ s/(\pM)//gso;
400                 $ns->value( $val );
401
402                 push @ns_list, $ns;
403         }
404
405         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
406                 next unless $tagline;
407
408                 my $tag = $tagline->getAttribute( "tag" );
409                 my $ind1 = $tagline->getAttribute( "ind1" );
410                 my $ind2 = $tagline->getAttribute( "ind2" );
411
412                 for my $data ( $tagline->childNodes ) {
413                         next unless $data;
414
415                         my $ns = new Fieldmapper::metabib::full_rec;
416
417                         $ns->tag( $tag );
418                         $ns->ind1( $ind1 );
419                         $ns->ind2( $ind2 );
420                         $ns->subfield( $data->getAttribute( "code" ) );
421                         my $val = $data->textContent;
422                         $val =~ s/(\pM)//gso;
423                         $ns->value( lc($val) );
424
425                         push @ns_list, $ns;
426                 }
427         }
428         return @ns_list;
429 }
430
431 sub _get_field_value {
432
433         my( $root, $xpath ) = @_;
434
435         my $string = "";
436
437         # grab the set of matching nodes
438         my @nodes = $root->findnodes( $xpath );
439         for my $value (@nodes) {
440
441                 # grab all children of the node
442                 my @children = $value->childNodes();
443                 for my $child (@children) {
444
445                         # add the childs content to the growing buffer
446                         my $content = quotemeta($child->textContent);
447                         next if ($string =~ /$content/);  # uniquify the values
448                         $string .= $child->textContent . " ";
449                 }
450                 if( ! @children ) {
451                         $string .= $value->textContent . " ";
452                 }
453         }
454         $string =~ s/(\pM)//gso;
455         return lc($string);
456 }
457
458
459 sub modsdoc_to_values {
460         my( $self, $mods ) = @_;
461         my $data = {};
462         for my $class (keys %$xpathset) {
463                 $data->{$class} = {};
464                 for my $type (keys %{$xpathset->{$class}}) {
465                         $data->{$class}->{$type} = {};
466                         $data->{$class}->{$type}->{value} = _get_field_value( $mods, $xpathset->{$class}->{$type}->{xpath} );
467                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
468                 }
469         }
470         return $data;
471 }
472
473
474 1;
475
476