1 package OpenILS::Application::WoRM;
2 use base qw/OpenSRF::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::FlatXML;
12 use OpenILS::Utils::Fieldmapper;
13 use OpenSRF::Utils::JSON;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
22 our $log = 'OpenSRF::Utils::Logger';
23 our $xml_util = OpenILS::Utils::FlatXML->new();
25 our $parser = XML::LibXML->new();
26 our $xslt = XML::LibXSLT->new();
34 $st_sess = $sess if ($sess);
44 $log->debug("Running post_init", DEBUG);
46 unless ($mods_sheet) {
47 $log->debug("Loading MODS XSLT", DEBUG);
48 my $xslt_doc = $parser->parse_file(
49 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
50 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
53 #if (!__PACKAGE__->st_sess()) {
54 # $log->debug("Creating cached storage server session", DEBUG);
55 # __PACKAGE__->st_sess( OpenSRF::AppSession->create('open-ils.storage') );
58 unless (keys %$xpathset) {
59 my $req = __PACKAGE__->storage_req('open-ils.storage.direct.config.metabib_field.retrieve.all.atomic');
61 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
62 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
63 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
78 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
84 OpenILS::Application::WoRM->post_init();
85 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
88 sub begin_transaction {
92 OpenILS::Application::WoRM->post_init();
93 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
97 $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
98 #__PACKAGE__->st_sess->connect;
99 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
100 unless (defined $r and $r) {
101 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
102 #__PACKAGE__->st_sess->disconnect;
103 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
107 $log->debug("WoRM Couldn't BEGIN transaction!", ERROR)
110 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
113 sub rollback_transaction {
117 OpenILS::Application::WoRM->post_init();
118 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
122 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
124 $log->debug("WoRM isn't inside a transaction.", INFO);
127 throw OpenSRF::EX::PANIC ("WoRM Couldn't ROLLBACK transaction!")
133 sub commit_transaction {
137 OpenILS::Application::WoRM->post_init();
138 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
141 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
143 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
144 unless (defined $r and $r) {
145 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
146 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
148 #__PACKAGE__->st_sess->disconnect;
150 $log->debug("WoRM isn't inside a transaction.", INFO);
153 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
162 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
163 return shift( @res );
166 sub scrub_authority_record {
172 if (!OpenILS::Application::WoRM->in_transaction) {
173 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
179 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
181 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
182 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
184 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
186 $log->debug('Scrubbing failed : '.shift(), ERROR);
187 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
191 OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
192 OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
195 __PACKAGE__->register_method(
196 api_name => "open-ils.worm.scrub.authority",
197 method => "scrub_authority_record",
203 sub scrub_metabib_record {
208 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
209 $rec = OpenILS::Application::WoRM->storage_req(
210 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
215 if (!OpenILS::Application::WoRM->in_transaction) {
216 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
222 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
224 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
225 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
226 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
227 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
228 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
229 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
230 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
231 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
233 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
234 my $masters = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
236 for my $mr (@$masters) {
237 $log->debug( "Found metarecord whose master is $rec", DEBUG);
238 my $others = OpenILS::Application::WoRM->storage_req(
239 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
242 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
243 $mr->master_record($others->[0]->source);
244 OpenILS::Application::WoRM->storage_req(
245 'open-ils.storage.direct.metabib.metarecord.remote_update',
247 { master_record => $others->[0]->source, mods => undef }
250 warn "Removing metarecord whose master is $rec";
251 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
252 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
253 warn "Metarecord removed";
254 $log->debug( "Metarecord removed", DEBUG);
258 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
261 $log->debug('Scrubbing failed : '.shift(), ERROR);
262 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
266 OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
267 OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
270 __PACKAGE__->register_method(
271 api_name => "open-ils.worm.scrub.biblio",
272 method => "scrub_metabib_record",
277 sub wormize_biblio_metarecord {
282 my $recs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
288 $success = wormize_biblio_record($self => $client => $r->source);
290 { record => $r->source,
291 metarecord => $rec->metarecord,
298 { record => $r->source,
299 metarecord => $rec->metarecord,
308 __PACKAGE__->register_method(
309 api_name => "open-ils.worm.wormize.metarecord",
310 method => "wormize_biblio_metarecord",
315 __PACKAGE__->register_method(
316 api_name => "open-ils.worm.wormize.metarecord.nomap",
317 method => "wormize_biblio_metarecord",
322 __PACKAGE__->register_method(
323 api_name => "open-ils.worm.wormize.metarecord.noscrub",
324 method => "wormize_biblio_metarecord",
329 __PACKAGE__->register_method(
330 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
331 method => "wormize_biblio_metarecord",
338 sub wormize_biblio_record {
343 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
344 $rec = OpenILS::Application::WoRM->storage_req(
345 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
351 if (!OpenILS::Application::WoRM->in_transaction) {
352 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
359 unless ($self->api_name =~ /noscrub/o) {
360 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
364 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
367 my @rec_descriptor = ();
379 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
381 my $xml = $parser->parse_string($r->marc);
383 #update the fingerprint
384 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
385 OpenILS::Application::WoRM->storage_req(
386 'open-ils.storage.direct.biblio.record_entry.remote_update',
388 { fingerprint => $fp->{fingerprint},
389 quality => int($fp->{quality}) }
390 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
393 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
394 $fr->record( $r->id );
398 # the rec_descriptor stuff
399 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
400 $rd->record( $r->id );
401 push @rec_descriptor, $rd;
403 # the indexing field entry stuff
404 for my $class ( qw/title author subject keyword series/ ) {
405 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
406 $fe->source( $r->id );
407 push @{$field_entry{$class}}, $fe;
411 unless ($self->api_name =~ /nomap/o) {
412 my $mr = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
415 $mr = Fieldmapper::metabib::metarecord->new;
416 $mr->fingerprint( $fp->{fingerprint} );
417 $mr->master_record( $r->id );
418 $mr->id( OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
421 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
422 $mr_map->metarecord( $mr->id );
423 $mr_map->source( $r->id );
424 push @source_map, $mr_map;
426 $metarecord{$mr->id} = $mr;
428 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
430 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
431 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
436 if (@rec_descriptor) {
437 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
439 OpenILS::Application::WoRM->storage_req(
440 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
444 for my $mr ( values %metarecord ) {
445 my $sources = OpenILS::Application::WoRM->storage_req(
446 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
450 my $bibs = OpenILS::Application::WoRM->storage_req(
451 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
452 [ map { $_->source } @$sources ]
455 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
457 OpenILS::Application::WoRM->storage_req(
458 'open-ils.storage.direct.metabib.metarecord.remote_update',
460 { master_record => $master->id, mods => undef }
464 OpenILS::Application::WoRM->storage_req(
465 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
467 ) if (@rec_descriptor);
469 OpenILS::Application::WoRM->storage_req(
470 'open-ils.storage.direct.metabib.full_rec.batch.create',
474 OpenILS::Application::WoRM->storage_req(
475 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
476 @{ $field_entry{title} }
477 ) if (@{ $field_entry{title} });
479 OpenILS::Application::WoRM->storage_req(
480 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
481 @{ $field_entry{author} }
482 ) if (@{ $field_entry{author} });
484 OpenILS::Application::WoRM->storage_req(
485 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
486 @{ $field_entry{subject} }
487 ) if (@{ $field_entry{subject} });
489 OpenILS::Application::WoRM->storage_req(
490 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
491 @{ $field_entry{keyword} }
492 ) if (@{ $field_entry{keyword} });
494 OpenILS::Application::WoRM->storage_req(
495 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
496 @{ $field_entry{series} }
497 ) if (@{ $field_entry{series} });
499 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
505 $log->debug('Wormization failed : '.shift(), ERROR);
506 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
510 OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
511 OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
514 __PACKAGE__->register_method(
515 api_name => "open-ils.worm.wormize.biblio",
516 method => "wormize_biblio_record",
520 __PACKAGE__->register_method(
521 api_name => "open-ils.worm.wormize.biblio.nomap",
522 method => "wormize_biblio_record",
526 __PACKAGE__->register_method(
527 api_name => "open-ils.worm.wormize.biblio.noscrub",
528 method => "wormize_biblio_record",
532 __PACKAGE__->register_method(
533 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
534 method => "wormize_biblio_record",
539 sub wormize_authority_record {
545 if (!OpenILS::Application::WoRM->in_transaction) {
546 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
553 unless ($self->api_name =~ /noscrub/o) {
554 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
558 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
561 my @rec_descriptor = ();
563 my $xml = $parser->parse_string($r->marc);
566 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
567 $fr->record( $r->id );
571 # the rec_descriptor stuff -- XXX What does this mean for authority records?
572 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
573 #$rd->record( $r->id );
574 #push @rec_descriptor, $rd;
578 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
580 #OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
581 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
583 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
586 $log->debug('Wormization failed : '.shift(), ERROR);
587 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
591 OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
592 OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
595 __PACKAGE__->register_method(
596 api_name => "open-ils.worm.wormize.authority",
597 method => "wormize_authority_record",
601 __PACKAGE__->register_method(
602 api_name => "open-ils.worm.wormize.authority.noscrub",
603 method => "wormize_authority_record",
609 # --------------------------------------------------------------------------------
610 # MARC index extraction
612 package OpenILS::Application::WoRM::XPATH;
613 use base qw/OpenILS::Application::WoRM/;
614 use Unicode::Normalize;
616 # give this a MODS documentElement and an XPATH expression
617 sub _xpath_to_string {
621 my $ns_prefix = shift;
624 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
628 # grab the set of matching nodes
629 my @nodes = $xml->findnodes( $xpath );
630 for my $value (@nodes) {
632 # grab all children of the node
633 my @children = $value->childNodes();
634 for my $child (@children) {
636 # add the childs content to the growing buffer
637 my $content = quotemeta($child->textContent);
638 next if ($unique && $string =~ /$content/); # uniquify the values
639 $string .= $child->textContent . " ";
642 $string .= $value->textContent . " ";
648 sub class_all_index_string_xml {
654 OpenILS::Application::WoRM->post_init();
655 $xml = $parser->parse_string($xml) unless (ref $xml);
657 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
658 for my $type ( keys %{ $xpathset->{$class} } ) {
659 my $value = _xpath_to_string(
660 $mods_sheet->transform($xml)->documentElement,
661 $xpathset->{$class}->{$type}->{xpath},
662 "http://www.loc.gov/mods/",
669 $value =~ s/\pM+//sgo;
670 $value =~ s/\pC+//sgo;
671 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
673 $value =~ s/(\w)\./$1/sgo;
676 my $fm = $class_constructor->new;
677 $fm->value( $value );
678 $fm->field( $xpathset->{$class}->{$type}->{id} );
679 $client->respond($fm);
683 __PACKAGE__->register_method(
684 api_name => "open-ils.worm.field_entry.class.xml",
685 method => "class_all_index_string_xml",
691 sub class_all_index_string_record {
697 OpenILS::Application::WoRM->post_init();
698 my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
700 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
702 $client->respond($fm);
706 __PACKAGE__->register_method(
707 api_name => "open-ils.worm.field_entry.class.record",
708 method => "class_all_index_string_record",
715 sub class_index_string_xml {
722 OpenILS::Application::WoRM->post_init();
723 $xml = $parser->parse_string($xml) unless (ref $xml);
724 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
726 __PACKAGE__->register_method(
727 api_name => "open-ils.worm.class.type.xml",
728 method => "class_index_string_xml",
733 sub class_index_string_record {
740 OpenILS::Application::WoRM->post_init();
741 my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
743 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
744 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
747 __PACKAGE__->register_method(
748 api_name => "open-ils.worm.class.type.record",
749 method => "class_index_string_record",
763 OpenILS::Application::WoRM->post_init();
764 $xml = $parser->parse_string($xml) unless (ref $xml);
765 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
767 __PACKAGE__->register_method(
768 api_name => "open-ils.worm.xpath.xml",
769 method => "xml_xpath",
783 OpenILS::Application::WoRM->post_init();
784 my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
786 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
787 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
790 __PACKAGE__->register_method(
791 api_name => "open-ils.worm.xpath.record",
792 method => "record_xpath",
798 # --------------------------------------------------------------------------------
801 package OpenILS::Application::WoRM::Biblio::Leader;
802 use base qw/OpenILS::Application::WoRM/;
803 use Unicode::Normalize;
805 our %marc_type_groups = (
817 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
821 our %biblio_descriptor_code = (
822 item_type => sub { substr($ldr,6,1); },
825 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
826 return substr($oo8,29,1);
827 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
828 return substr($oo8,23,1);
832 bib_level => sub { substr($ldr,7,1); },
833 control_type => sub { substr($ldr,8,1); },
834 char_encoding => sub { substr($ldr,9,1); },
835 enc_level => sub { substr($ldr,17,1); },
836 cat_form => sub { substr($ldr,18,1); },
837 pub_status => sub { substr($ldr,5,1); },
838 item_lang => sub { substr($oo8,35,3); },
839 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
840 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
841 audience => sub { substr($oo8,22,1); },
844 sub _extract_biblio_descriptors {
847 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
848 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
849 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
851 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
852 for my $rd_field ( keys %biblio_descriptor_code ) {
853 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
859 sub extract_biblio_desc_xml {
864 $xml = $parser->parse_string($xml) unless (ref $xml);
866 return _extract_biblio_descriptors( $xml );
868 __PACKAGE__->register_method(
869 api_name => "open-ils.worm.biblio_leader.xml",
870 method => "extract_biblio_desc_xml",
875 sub extract_biblio_desc_record {
880 OpenILS::Application::WoRM->post_init();
881 my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
883 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
884 $log->debug("Record descriptor for bib rec $rec is ".OpenSRF::Utils::JSON->perl2JSON($d), DEBUG);
887 __PACKAGE__->register_method(
888 api_name => "open-ils.worm.biblio_leader.record",
889 method => "extract_biblio_desc_record",
894 # --------------------------------------------------------------------------------
897 package OpenILS::Application::WoRM::FlatMARC;
898 use base qw/OpenILS::Application::WoRM/;
899 use Unicode::Normalize;
902 sub _marcxml_to_full_rows {
905 my $xmltype = shift || 'metabib';
907 my $type = "Fieldmapper::${xmltype}::full_rec";
911 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
913 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
914 next unless $tagline;
919 my $val = $tagline->textContent;
921 $val =~ s/(\pM+)//gso;
927 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
928 next unless $tagline;
932 $ns->tag( $tagline->getAttribute( "tag" ) );
933 my $val = $tagline->textContent;
935 $val =~ s/(\pM+)//gso;
941 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
942 next unless $tagline;
944 my $tag = $tagline->getAttribute( "tag" );
945 my $ind1 = $tagline->getAttribute( "ind1" );
946 my $ind2 = $tagline->getAttribute( "ind2" );
948 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
956 $ns->subfield( $data->getAttribute( "code" ) );
957 my $val = $data->textContent;
959 $val =~ s/(\pM+)//gso;
960 $ns->value( lc($val) );
966 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
975 $xml = $parser->parse_string($xml) unless (ref $xml);
977 my $type = 'metabib';
978 $type = 'authority' if ($self->api_name =~ /authority/o);
980 OpenILS::Application::WoRM->post_init();
982 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
985 __PACKAGE__->register_method(
986 api_name => "open-ils.worm.flat_marc.authority.xml",
987 method => "flat_marc_xml",
992 __PACKAGE__->register_method(
993 api_name => "open-ils.worm.flat_marc.biblio.xml",
994 method => "flat_marc_xml",
1000 sub flat_marc_record {
1005 my $type = 'biblio';
1006 $type = 'authority' if ($self->api_name =~ /authority/o);
1008 OpenILS::Application::WoRM->post_init();
1009 my $r = OpenILS::Application::WoRM->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1011 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1014 __PACKAGE__->register_method(
1015 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1016 method => "flat_marc_record",
1021 __PACKAGE__->register_method(
1022 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1023 method => "flat_marc_record",
1030 # --------------------------------------------------------------------------------
1033 package OpenILS::Application::WoRM::Biblio::Fingerprint;
1034 use base qw/OpenILS::Application::WoRM/;
1035 use Unicode::Normalize;
1036 use OpenSRF::EX qw/:try/;
1038 my @fp_mods_xpath = (
1039 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1042 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1043 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1044 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1045 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1048 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1050 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1051 $text =~ s/\pM+//gso;
1052 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1054 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1055 $text =~ s/\s+/ /sgo;
1056 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1057 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1058 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1059 $text =~ s/\b(?:the|an?)\b//sgo;
1060 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1061 $text =~ s/\[.[^\]]+\]//sgo;
1062 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1063 $text =~ s/\s*[;\/\.]*$//sgo;
1064 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1069 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1070 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1073 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1075 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1076 $text =~ s/\pM+//gso;
1077 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1079 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1080 $text =~ s/\s+/ /sgo;
1081 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1082 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1083 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1084 $text =~ s/,?\s+.*$//sgo;
1085 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1090 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1093 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1094 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1095 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1096 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1097 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1098 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1099 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1100 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1103 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1105 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1106 $text =~ s/\pM+//gso;
1107 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1109 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1110 $text =~ s/\s+/ /sgo;
1111 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1112 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1113 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1114 $text =~ s/\b(?:the|an?)\b//sgo;
1115 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1116 $text =~ s/\[.[^\]]+\]//sgo;
1117 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1118 $text =~ s/\s*[;\/\.]*$//sgo;
1119 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1124 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1125 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1126 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1127 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1130 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1132 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1133 $text =~ s/\pM+//gso;
1134 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1136 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1137 $text =~ s/\s+/ /sgo;
1138 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1139 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1140 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1141 $text =~ s/,?\s+.*$//sgo;
1142 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1149 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1153 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1157 my $match_index = 0;
1158 my $block_index = 1;
1159 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1160 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1162 my $block_name_index = 0;
1163 my $block_value_index = 1;
1164 my $block = $fp_mods_xpath[$block_index];
1165 while ( my $part = $$block[$block_value_index] ) {
1167 for my $xpath ( @{ $part->{xpath} } ) {
1168 $text = $mods->findvalue( $xpath );
1172 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1176 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1177 $fp_string .= $text;
1180 $block_name_index += 2;
1181 $block_value_index += 2;
1185 $fp_string =~ s/\W+//gso;
1186 $log->debug("Fingerprint is [$fp_string]", INFO);;
1196 sub refingerprint_bibrec {
1202 if (!OpenILS::Application::WoRM->in_transaction) {
1203 OpenILS::Application::WoRM->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1209 my $bibs = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1210 for my $b (@$bibs) {
1211 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1213 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1215 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1217 OpenILS::Application::WoRM->storage_req(
1218 'open-ils.storage.direct.biblio.record_entry.remote_update',
1220 { fingerprint => $fp->{fingerprint},
1221 quality => $fp->{quality} }
1224 if ($self->api_name !~ /nomap/o) {
1225 my $old_source_map = OpenILS::Application::WoRM->storage_req(
1226 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1231 if (ref($old_source_map) and @$old_source_map) {
1232 for my $m (@$old_source_map) {
1233 $old_mrid = $m->metarecord;
1234 OpenILS::Application::WoRM->storage_req(
1235 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1241 my $old_sm = OpenILS::Application::WoRM->storage_req(
1242 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1243 { metarecord => $old_mrid }
1246 if (ref($old_sm) and @$old_sm == 0) {
1247 OpenILS::Application::WoRM->storage_req(
1248 'open-ils.storage.direct.metabib.metarecord.delete',
1253 my $mr = OpenILS::Application::WoRM->storage_req(
1254 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1255 { fingerprint => $fp->{fingerprint} }
1259 $mr = Fieldmapper::metabib::metarecord->new;
1260 $mr->fingerprint( $fp->{fingerprint} );
1261 $mr->master_record( $b->id );
1262 $mr->id( OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1265 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1266 $mr_map->metarecord( $mr->id );
1267 $mr_map->source( $b->id );
1268 OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1272 $client->respond($b->id);
1276 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1280 OpenILS::Application::WoRM->commit_transaction if ($commit && $success);
1281 OpenILS::Application::WoRM->rollback_transaction if ($commit && !$success);
1284 __PACKAGE__->register_method(
1285 api_name => "open-ils.worm.fingerprint.record.update",
1286 method => "refingerprint_bibrec",
1292 __PACKAGE__->register_method(
1293 api_name => "open-ils.worm.fingerprint.record.update.nomap",
1294 method => "refingerprint_bibrec",
1301 sub fingerprint_bibrec {
1306 OpenILS::Application::WoRM->post_init();
1307 my $r = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1309 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1310 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1314 __PACKAGE__->register_method(
1315 api_name => "open-ils.worm.fingerprint.record",
1316 method => "fingerprint_bibrec",
1322 sub fingerprint_mods {
1327 OpenILS::Application::WoRM->post_init();
1328 my $mods = $parser->parse_string($xml)->documentElement;
1330 return _fp_mods( $mods );
1332 __PACKAGE__->register_method(
1333 api_name => "open-ils.worm.fingerprint.mods",
1334 method => "fingerprint_mods",
1339 sub fingerprint_marc {
1344 $xml = $parser->parse_string($xml) unless (ref $xml);
1346 OpenILS::Application::WoRM->post_init();
1347 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1348 $log->debug("Returning [$fp] as fingerprint", INFO);
1351 __PACKAGE__->register_method(
1352 api_name => "open-ils.worm.fingerprint.marc",
1353 method => "fingerprint_marc",
1361 sub biblio_fingerprint_record {
1366 OpenILS::Application::WoRM->post_init();
1368 my $marc = OpenILS::Application::WoRM
1369 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1372 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1373 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1376 __PACKAGE__->register_method(
1377 api_name => "open-ils.worm.fingerprint.record",
1378 method => "biblio_fingerprint_record",
1384 sub biblio_fingerprint {
1389 OpenILS::Application::WoRM->post_init();
1391 $marc = $parser->parse_string($marc) unless (ref $marc);
1393 my $mods = OpenILS::Application::WoRM::entityize(
1395 ->transform( $marc )
1401 $marc = OpenILS::Application::WoRM::entityize( $marc->documentElement->toString => 'D' );
1404 $log->internal("Got MARC [$marc]");
1405 $log->internal("Created MODS [$mods]");
1408 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1409 my $conf = OpenSRF::Utils::SettingsClient->new;
1411 my $libs = $conf->config_value(@pfx, 'script_path');
1412 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1413 my $script_libs = (ref($libs)) ? $libs : [$libs];
1415 $log->debug("Loading script $script_file for biblio fingerprinting...");
1417 $fp_script = new OpenILS::Utils::ScriptRunner
1418 ( file => $script_file,
1419 paths => $script_libs,
1420 reset_count => 1000 );
1423 $log->debug("Applying environment for biblio fingerprinting...");
1425 my $env = {marc => $marc, mods => $mods};
1426 #my $res = {fingerprint => '', quality => '0'};
1428 $fp_script->insert('environment' => $env);
1429 #$fp_script->insert('result' => $res);
1431 $log->debug("Running script for biblio fingerprinting...");
1433 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
1435 $log->debug("Script for biblio fingerprinting completed successfully...");
1439 __PACKAGE__->register_method(
1440 api_name => "open-ils.worm.fingerprint.marc",
1441 method => "biblio_fingerprint",
1446 # --------------------------------------------------------------------------------
1460 my $create_source_map;
1475 my %descriptor_code = (
1476 item_type => 'substr($ldr,6,1)',
1477 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1478 bib_level => 'substr($ldr,7,1)',
1479 control_type => 'substr($ldr,8,1)',
1480 char_encoding => 'substr($ldr,9,1)',
1481 enc_level => 'substr($ldr,17,1)',
1482 cat_form => 'substr($ldr,18,1)',
1483 pub_status => 'substr($ldr,5,1)',
1484 item_lang => 'substr($oo8,35,3)',
1485 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1486 audience => 'substr($oo8,22,1)',
1496 if ($self->api_name =~ /no_map/o) {
1500 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1502 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1504 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1506 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1508 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
1509 unless ($sm_lookup);
1510 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
1511 unless ($mr_lookup);
1512 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
1513 unless ($mr_update);
1514 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
1516 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
1517 unless ($update_entry);
1518 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
1519 unless ($rm_old_sm);
1520 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
1521 unless ($rm_old_rd);
1522 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
1523 unless ($rm_old_fr);
1524 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
1525 unless ($rm_old_tr);
1526 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
1527 unless ($rm_old_ar);
1528 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
1529 unless ($rm_old_sr);
1530 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
1531 unless ($rm_old_kr);
1532 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
1533 unless ($rm_old_ser);
1534 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
1535 unless ($mr_create);
1536 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
1537 unless ($create_source_map);
1538 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
1539 unless ($rd_create);
1540 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
1541 unless ($fr_create);
1542 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
1543 unless ($$create{title});
1544 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
1545 unless ($$create{author});
1546 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
1547 unless ($$create{subject});
1548 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
1549 unless ($$create{keyword});
1550 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
1551 unless ($$create{series});
1554 my ($outer_xact) = $in_xact->run;
1556 unless ($outer_xact) {
1557 $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1558 my ($r) = $begin->run($client);
1559 unless (defined $r and $r) {
1561 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1564 } catch Error with {
1565 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1575 for my $entry ( $lookup->run(@docids) ) {
1576 # step -1: grab the doc from storage
1577 next unless ($entry);
1580 my $xslt_doc = $parser->parse_file(
1581 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
1582 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
1585 my $xml = $entry->marc;
1586 my $docid = $entry->id;
1587 my $marcdoc = $parser->parse_string($xml);
1588 my $modsdoc = $mods_sheet->transform($marcdoc);
1590 my $mods = $modsdoc->documentElement;
1591 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1593 $entry->fingerprint( fingerprint_mods( $mods ) );
1594 push @entry_list, $entry;
1596 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
1599 my ($mr) = $mr_lookup->run( $entry->fingerprint );
1600 if (!$mr || !@$mr) {
1601 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
1602 $mr = new Fieldmapper::metabib::metarecord;
1603 $mr->fingerprint( $entry->fingerprint );
1604 $mr->master_record( $entry->id );
1605 my ($new_mr) = $mr_create->run($mr);
1607 unless (defined $mr) {
1608 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
1611 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
1616 my $sm = new Fieldmapper::metabib::metarecord_source_map;
1617 $sm->metarecord( $mr->id );
1618 $sm->source( $entry->id );
1619 push @source_maps, $sm;
1622 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1623 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1625 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1626 for my $rd_field ( keys %descriptor_code ) {
1627 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1629 $rd_obj->record( $docid );
1630 push @rd_list, $rd_obj;
1632 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
1634 # step 2: build the KOHA rows
1635 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
1636 $_->record( $docid ) for (@tmp_list);
1637 push @ns_list, @tmp_list;
1641 last unless ($self->api_name =~ /batch$/o);
1644 $rm_old_rd->run( { record => \@docids } );
1645 $rm_old_fr->run( { record => \@docids } );
1646 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
1647 $rm_old_tr->run( { source => \@docids } );
1648 $rm_old_ar->run( { source => \@docids } );
1649 $rm_old_sr->run( { source => \@docids } );
1650 $rm_old_kr->run( { source => \@docids } );
1651 $rm_old_ser->run( { source => \@docids } );
1654 my ($sm) = $create_source_map->run(@source_maps);
1655 unless (defined $sm) {
1656 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
1658 my ($mr) = $mr_update->run(@mr_list);
1659 unless (defined $mr) {
1660 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
1664 my ($re) = $update_entry->run(@entry_list);
1665 unless (defined $re) {
1666 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
1669 my ($rd) = $rd_create->run(@rd_list);
1670 unless (defined $rd) {
1671 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
1674 my ($fr) = $fr_create->run(@ns_list);
1675 unless (defined $fr) {
1676 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
1679 # step 5: insert the new metadata
1680 for my $class ( qw/title author subject keyword series/ ) {
1682 for my $doc ( @mods_data ) {
1683 my ($did) = keys %$doc;
1684 my ($data) = values %$doc;
1686 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
1687 for my $row ( keys %{ $$data{$class} } ) {
1688 next unless (exists $$data{$class}{$row});
1689 next unless ($$data{$class}{$row}{value});
1690 my $fm_obj = $fm_constructor->new;
1691 $fm_obj->value( $$data{$class}{$row}{value} );
1692 $fm_obj->field( $$data{$class}{$row}{field_id} );
1693 $fm_obj->source( $did );
1694 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
1696 push @md_list, $fm_obj;
1700 my ($cr) = $$create{$class}->run(@md_list);
1701 unless (defined $cr) {
1702 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
1706 unless ($outer_xact) {
1707 $log->debug("Commiting transaction started by the WoRM.", INFO);
1708 my ($c) = $commit->run;
1709 unless (defined $c and $c) {
1711 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1717 __PACKAGE__->register_method(
1718 api_name => "open-ils.worm.wormize",
1719 method => "wormize",
1723 __PACKAGE__->register_method(
1724 api_name => "open-ils.worm.wormize.no_map",
1725 method => "wormize",
1729 __PACKAGE__->register_method(
1730 api_name => "open-ils.worm.wormize.batch",
1731 method => "wormize",
1735 __PACKAGE__->register_method(
1736 api_name => "open-ils.worm.wormize.no_map.batch",
1737 method => "wormize",
1752 my $acreate_source_map;
1767 sub authority_wormize {
1774 if ($self->api_name =~ /no_map/o) {
1778 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1780 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1782 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1784 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1786 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1788 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1789 unless ($aupdate_entry);
1790 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1791 unless ($arm_old_rd);
1792 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1793 unless ($arm_old_fr);
1794 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1795 unless ($ard_create);
1796 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1797 unless ($afr_create);
1800 my ($outer_xact) = $in_xact->run;
1802 unless ($outer_xact) {
1803 $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1804 my ($r) = $begin->run($client);
1805 unless (defined $r and $r) {
1807 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1810 } catch Error with {
1811 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1821 for my $entry ( $lookup->run(@docids) ) {
1822 # step -1: grab the doc from storage
1823 next unless ($entry);
1826 # my $xslt_doc = $parser->parse_file(
1827 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
1828 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1831 my $xml = $entry->marc;
1832 my $docid = $entry->id;
1833 my $marcdoc = $parser->parse_string($xml);
1834 #my $madsdoc = $mads_sheet->transform($marcdoc);
1836 #my $mads = $madsdoc->documentElement;
1837 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1839 push @entry_list, $entry;
1841 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1842 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1844 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1845 for my $rd_field ( keys %descriptor_code ) {
1846 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1848 $rd_obj->record( $docid );
1849 push @rd_list, $rd_obj;
1851 # step 2: build the KOHA rows
1852 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1853 $_->record( $docid ) for (@tmp_list);
1854 push @ns_list, @tmp_list;
1858 last unless ($self->api_name =~ /batch$/o);
1861 $arm_old_rd->run( { record => \@docids } );
1862 $arm_old_fr->run( { record => \@docids } );
1864 my ($rd) = $ard_create->run(@rd_list);
1865 unless (defined $rd) {
1866 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1869 my ($fr) = $fr_create->run(@ns_list);
1870 unless (defined $fr) {
1871 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1874 unless ($outer_xact) {
1875 $log->debug("Commiting transaction started by the WoRM.", INFO);
1876 my ($c) = $commit->run;
1877 unless (defined $c and $c) {
1879 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1885 __PACKAGE__->register_method(
1886 api_name => "open-ils.worm.authortiy.wormize",
1887 method => "wormize",
1891 __PACKAGE__->register_method(
1892 api_name => "open-ils.worm.authority.wormize.batch",
1893 method => "wormize",
1899 # --------------------------------------------------------------------------------
1902 sub _marcxml_to_full_rows {
1904 my $marcxml = shift;
1905 my $type = shift || 'Fieldmapper::metabib::full_rec';
1909 my $root = $marcxml->documentElement;
1911 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1912 next unless $tagline;
1914 my $ns = new Fieldmapper::metabib::full_rec;
1917 my $val = NFD($tagline->textContent);
1918 $val =~ s/(\pM+)//gso;
1924 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1925 next unless $tagline;
1927 my $ns = new Fieldmapper::metabib::full_rec;
1929 $ns->tag( $tagline->getAttribute( "tag" ) );
1930 my $val = NFD($tagline->textContent);
1931 $val =~ s/(\pM+)//gso;
1937 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1938 next unless $tagline;
1940 my $tag = $tagline->getAttribute( "tag" );
1941 my $ind1 = $tagline->getAttribute( "ind1" );
1942 my $ind2 = $tagline->getAttribute( "ind2" );
1944 for my $data ( $tagline->childNodes ) {
1947 my $ns = $type->new;
1952 $ns->subfield( $data->getAttribute( "code" ) );
1953 my $val = NFD($data->textContent);
1954 $val =~ s/(\pM+)//gso;
1955 $ns->value( lc($val) );
1963 sub _get_field_value {
1965 my( $root, $xpath ) = @_;
1969 # grab the set of matching nodes
1970 my @nodes = $root->findnodes( $xpath );
1971 for my $value (@nodes) {
1973 # grab all children of the node
1974 my @children = $value->childNodes();
1975 for my $child (@children) {
1977 # add the childs content to the growing buffer
1978 my $content = quotemeta($child->textContent);
1979 next if ($string =~ /$content/); # uniquify the values
1980 $string .= $child->textContent . " ";
1983 $string .= $value->textContent . " ";
1986 $string = NFD($string);
1987 $string =~ s/(\pM)//gso;
1992 sub modsdoc_to_values {
1993 my( $self, $mods ) = @_;
1995 for my $class (keys %$xpathset) {
1996 $data->{$class} = {};
1997 for my $type (keys %{$xpathset->{$class}}) {
1998 $data->{$class}->{$type} = {};
1999 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};