1 package OpenILS::Application::Storage::WORM;
2 use base qw/OpenILS::Application::Storage/;
3 use strict; use warnings;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10 my $log = 'OpenSRF::Utils::Logger';
12 use OpenILS::Utils::FlatXML;
13 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 my $xml_util = OpenILS::Utils::FlatXML->new();
22 my $parser = XML::LibXML->new();
23 my $xslt = XML::LibXSLT->new();
32 my $meth = __PACKAGE__->method_lookup('open-ils.storage.direct.config.metabib_field.retrieve.all');
33 for my $f ($meth->run) {
34 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
35 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
36 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
40 # --------------------------------------------------------------------------------
44 '//mods:mods/mods:typeOfResource[text()="text"]' => [
47 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
48 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
49 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
50 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
54 $text = lc(NFD($text));
57 $text =~ s/^\s*(.+)\s*$/$1/sgo;
58 $text =~ s/\b(?:the|an?)\b//sgo;
59 $text =~ s/\[.[^\]]+\]//sgo;
60 $text =~ s/\s*[;\/\.]*$//sgo;
66 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
67 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
71 $text = lc(NFD($text));
74 $text =~ s/^\s*(.+)\s*$/$1/sgo;
75 $text =~ s/,?\s+.*$//sgo;
82 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
85 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
86 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
87 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
88 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
89 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
90 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
91 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
92 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
96 $text = lc(NFD($text));
99 $text =~ s/^\s*(.+)\s*$/$1/sgo;
100 $text =~ s/\b(?:the|an?)\b//sgo;
101 $text =~ s/\[.[^\]]+\]//sgo;
102 $text =~ s/\s*[;\/\.]*$//sgo;
103 $text =~ s/\pM+//gso;
109 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
110 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
111 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
112 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
116 $text = lc(NFD($text));
117 $text =~ s/\pM+//gso;
118 $text =~ s/\s+/ /sgo;
119 $text =~ s/^\s*(.+)\s*$/$1/sgo;
120 $text =~ s/,?\s+.*$//sgo;
121 $text =~ s/\pM+//gso;
129 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
131 sub fingerprint_mods {
138 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
139 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
141 my $block_name_index = 0;
142 my $block_value_index = 1;
143 my $block = $fp_mods_xpath[$block_index];
144 while ( my $part = $$block[$block_value_index] ) {
146 for my $xpath ( @{ $part->{xpath} } ) {
147 $text = $mods->findvalue( $xpath );
151 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
158 $block_name_index += 2;
159 $block_value_index += 2;
163 $fp_string =~ s/\W+//gso;
164 $log->debug("Fingerprint is [$fp_string]", INFO);;
176 # --------------------------------------------------------------------------------
187 my $create_source_map;
202 my %descriptor_code = (
203 item_type => 'substr($ldr,6,1)',
204 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
205 bib_level => 'substr($ldr,7,1)',
206 control_type => 'substr($ldr,8,1)',
207 char_encoding => 'substr($ldr,9,1)',
208 enc_level => 'substr($ldr,17,1)',
209 cat_form => 'substr($ldr,18,1)',
210 pub_status => 'substr($ldr,5,1)',
211 item_lang => 'substr($oo8,35,3)',
212 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
213 audience => 'substr($oo8,22,1)',
223 if ($self->api_name =~ /no_map/o) {
227 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
229 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
231 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
233 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
235 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
237 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
239 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
241 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
243 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
244 unless ($create_source_map);
245 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
247 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
248 unless ($update_entry);
249 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
251 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
253 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
255 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
257 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
259 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
261 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
263 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
264 unless ($rm_old_ser);
265 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
267 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
269 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
270 unless ($$create{title});
271 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
272 unless ($$create{author});
273 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
274 unless ($$create{subject});
275 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
276 unless ($$create{keyword});
277 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
278 unless ($$create{series});
281 my ($outer_xact) = $in_xact->run;
283 unless ($outer_xact) {
284 $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
285 my ($r) = $begin->run($client);
286 unless (defined $r and $r) {
288 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
292 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
302 for my $entry ( $lookup->run(@docids) ) {
303 # step -1: grab the doc from storage
304 next unless ($entry);
307 my $xslt_doc = $parser->parse_file(
308 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
309 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
312 my $xml = $entry->marc;
313 my $docid = $entry->id;
314 my $marcdoc = $parser->parse_string($xml);
315 my $modsdoc = $mods_sheet->transform($marcdoc);
317 my $mods = $modsdoc->documentElement;
318 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
320 $entry->fingerprint( fingerprint_mods( $mods ) );
321 push @entry_list, $entry;
323 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
326 my ($mr) = $mr_lookup->run( $entry->fingerprint );
328 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
329 $mr = new Fieldmapper::metabib::metarecord;
330 $mr->fingerprint( $entry->fingerprint );
331 $mr->master_record( $entry->id );
332 my ($new_mr) = $mr_create->run($mr);
334 unless (defined $mr) {
335 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
338 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
343 my $sm = new Fieldmapper::metabib::metarecord_source_map;
344 $sm->metarecord( $mr->id );
345 $sm->source( $entry->id );
346 push @source_maps, $sm;
349 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
350 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
352 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
353 for my $rd_field ( keys %descriptor_code ) {
354 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
356 $rd_obj->record( $docid );
357 push @rd_list, $rd_obj;
359 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
361 # step 2: build the KOHA rows
362 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
363 $_->record( $docid ) for (@tmp_list);
364 push @ns_list, @tmp_list;
368 last unless ($self->api_name =~ /batch$/o);
371 $rm_old_rd->run( { record => \@docids } );
372 $rm_old_fr->run( { record => \@docids } );
373 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
374 $rm_old_tr->run( { source => \@docids } );
375 $rm_old_ar->run( { source => \@docids } );
376 $rm_old_sr->run( { source => \@docids } );
377 $rm_old_kr->run( { source => \@docids } );
378 $rm_old_ser->run( { source => \@docids } );
381 my ($sm) = $create_source_map->run(@source_maps);
382 unless (defined $sm) {
383 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
385 my ($mr) = $mr_update->run(@mr_list);
386 unless (defined $mr) {
387 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
391 my ($re) = $update_entry->run(@entry_list);
392 unless (defined $re) {
393 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
396 my ($rd) = $rd_create->run(@rd_list);
397 unless (defined $rd) {
398 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
401 my ($fr) = $fr_create->run(@ns_list);
402 unless (defined $fr) {
403 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
406 # step 5: insert the new metadata
407 for my $class ( qw/title author subject keyword series/ ) {
409 for my $doc ( @mods_data ) {
410 my ($did) = keys %$doc;
411 my ($data) = values %$doc;
413 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
414 for my $row ( keys %{ $$data{$class} } ) {
415 next unless (exists $$data{$class}{$row});
416 next unless ($$data{$class}{$row}{value});
417 my $fm_obj = $fm_constructor->new;
418 $fm_obj->value( $$data{$class}{$row}{value} );
419 $fm_obj->field( $$data{$class}{$row}{field_id} );
420 $fm_obj->source( $did );
421 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
423 push @md_list, $fm_obj;
427 my ($cr) = $$create{$class}->run(@md_list);
428 unless (defined $cr) {
429 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
433 unless ($outer_xact) {
434 $log->debug("Commiting transaction started by the WoRM.", INFO);
435 my ($c) = $commit->run;
436 unless (defined $c and $c) {
438 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
444 __PACKAGE__->register_method(
445 api_name => "open-ils.worm.wormize",
450 __PACKAGE__->register_method(
451 api_name => "open-ils.worm.wormize.no_map",
456 __PACKAGE__->register_method(
457 api_name => "open-ils.worm.wormize.batch",
462 __PACKAGE__->register_method(
463 api_name => "open-ils.worm.wormize.no_map.batch",
479 my $acreate_source_map;
494 sub authority_wormize {
501 if ($self->api_name =~ /no_map/o) {
505 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
507 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
509 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
511 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
513 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
515 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
516 unless ($aupdate_entry);
517 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
518 unless ($arm_old_rd);
519 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
520 unless ($arm_old_fr);
521 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
522 unless ($ard_create);
523 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
524 unless ($afr_create);
527 my ($outer_xact) = $in_xact->run;
529 unless ($outer_xact) {
530 $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
531 my ($r) = $begin->run($client);
532 unless (defined $r and $r) {
534 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
538 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
548 for my $entry ( $lookup->run(@docids) ) {
549 # step -1: grab the doc from storage
550 next unless ($entry);
553 # my $xslt_doc = $parser->parse_file(
554 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
555 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
558 my $xml = $entry->marc;
559 my $docid = $entry->id;
560 my $marcdoc = $parser->parse_string($xml);
561 #my $madsdoc = $mads_sheet->transform($marcdoc);
563 #my $mads = $madsdoc->documentElement;
564 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
566 push @entry_list, $entry;
568 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
569 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
571 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
572 for my $rd_field ( keys %descriptor_code ) {
573 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
575 $rd_obj->record( $docid );
576 push @rd_list, $rd_obj;
578 # step 2: build the KOHA rows
579 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
580 $_->record( $docid ) for (@tmp_list);
581 push @ns_list, @tmp_list;
585 last unless ($self->api_name =~ /batch$/o);
588 $arm_old_rd->run( { record => \@docids } );
589 $arm_old_fr->run( { record => \@docids } );
591 my ($rd) = $ard_create->run(@rd_list);
592 unless (defined $rd) {
593 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
596 my ($fr) = $fr_create->run(@ns_list);
597 unless (defined $fr) {
598 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
601 unless ($outer_xact) {
602 $log->debug("Commiting transaction started by the WoRM.", INFO);
603 my ($c) = $commit->run;
604 unless (defined $c and $c) {
606 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
612 __PACKAGE__->register_method(
613 api_name => "open-ils.worm.authortiy.wormize",
618 __PACKAGE__->register_method(
619 api_name => "open-ils.worm.authority.wormize.batch",
626 # --------------------------------------------------------------------------------
629 sub _marcxml_to_full_rows {
632 my $type = shift || 'Fieldmapper::metabib::full_rec';
636 my $root = $marcxml->documentElement;
638 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
639 next unless $tagline;
641 my $ns = new Fieldmapper::metabib::full_rec;
644 my $val = NFD($tagline->textContent);
645 $val =~ s/(\pM+)//gso;
651 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
652 next unless $tagline;
654 my $ns = new Fieldmapper::metabib::full_rec;
656 $ns->tag( $tagline->getAttribute( "tag" ) );
657 my $val = NFD($tagline->textContent);
658 $val =~ s/(\pM+)//gso;
664 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
665 next unless $tagline;
667 my $tag = $tagline->getAttribute( "tag" );
668 my $ind1 = $tagline->getAttribute( "ind1" );
669 my $ind2 = $tagline->getAttribute( "ind2" );
671 for my $data ( $tagline->childNodes ) {
679 $ns->subfield( $data->getAttribute( "code" ) );
680 my $val = NFD($data->textContent);
681 $val =~ s/(\pM+)//gso;
682 $ns->value( lc($val) );
690 sub _get_field_value {
692 my( $root, $xpath ) = @_;
696 # grab the set of matching nodes
697 my @nodes = $root->findnodes( $xpath );
698 for my $value (@nodes) {
700 # grab all children of the node
701 my @children = $value->childNodes();
702 for my $child (@children) {
704 # add the childs content to the growing buffer
705 my $content = quotemeta($child->textContent);
706 next if ($string =~ /$content/); # uniquify the values
707 $string .= $child->textContent . " ";
710 $string .= $value->textContent . " ";
713 $string = NFD($string);
714 $string =~ s/(\pM)//gso;
719 sub modsdoc_to_values {
720 my( $self, $mods ) = @_;
722 for my $class (keys %$xpathset) {
723 $data->{$class} = {};
724 for my $type (keys %{$xpathset->{$class}}) {
725 $data->{$class}->{$type} = {};
726 $data->{$class}->{$type}->{value} = _get_field_value( $mods, $xpathset->{$class}->{$type}->{xpath} );
727 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};