1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::FlatXML;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
30 our $log = 'OpenSRF::Utils::Logger';
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
43 unless (keys %$xpathset) {
44 $log->debug("Running post_init", DEBUG);
46 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
48 unless ($supported_formats{mods}{xslt}) {
49 $log->debug("Loading MODS XSLT", DEBUG);
50 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
54 unless ($supported_formats{mods3}{xslt}) {
55 $log->debug("Loading MODS v3 XSLT", DEBUG);
56 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61 my $req = __PACKAGE__->storage_req('open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } });
63 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
64 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
65 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
66 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
81 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
85 # --------------------------------------------------------------------------------
86 # MARC index extraction
88 package OpenILS::Application::Ingest::XPATH;
89 use base qw/OpenILS::Application::Ingest/;
90 use Unicode::Normalize;
92 # give this a MODS documentElement and an XPATH expression
97 my $ns_prefix = shift;
100 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
104 # grab the set of matching nodes
105 my @nodes = $xml->findnodes( $xpath );
106 for my $value (@nodes) {
108 # grab all children of the node
109 my @children = $value->childNodes();
110 for my $child (@children) {
112 # add the childs content to the growing buffer
113 my $content = quotemeta($child->textContent);
114 next if ($unique && $string =~ /$content/); # uniquify the values
115 $string .= $child->textContent . " ";
118 $string .= $value->textContent . " ";
124 sub class_index_string_xml {
130 OpenILS::Application::Ingest->post_init();
131 $xml = $parser->parse_string($xml) unless (ref $xml);
133 for my $class (@classes) {
134 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
135 for my $type ( keys %{ $xpathset->{$class} } ) {
137 my $def = $xpathset->{$class}->{$type};
138 my $value = xpath_to_string(
139 $mods_sheet->transform($xml)->documentElement,
141 $supported_formats{$def->{format}}{ns},
148 $value =~ s/\pM+//sgo;
149 $value =~ s/\pC+//sgo;
150 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
152 $value =~ s/(\w)\./$1/sgo;
155 my $fm = $class_constructor->new;
156 $fm->value( $value );
157 $fm->field( $xpathset->{$class}->{$type}->{id} );
158 $client->respond($fm);
163 __PACKAGE__->register_method(
164 api_name => "open-ils.ingest.field_entry.class.xml",
165 method => "class_index_string_xml",
171 sub class_index_string_record {
177 OpenILS::Application::Ingest->post_init();
178 my $r = OpenSRF::AppSession->create('open-ils.cstore')->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
180 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
182 $client->respond($fm);
186 __PACKAGE__->register_method(
187 api_name => "open-ils.worm.field_entry.class.record",
188 method => "class_index_string_record",
200 OpenILS::Application::Ingest->post_init();
201 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
204 sub begin_transaction {
208 OpenILS::Application::Ingest->post_init();
209 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
213 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
214 #__PACKAGE__->st_sess->connect;
215 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
216 unless (defined $r and $r) {
217 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
218 #__PACKAGE__->st_sess->disconnect;
219 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
223 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
226 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
229 sub rollback_transaction {
233 OpenILS::Application::Ingest->post_init();
234 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
238 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
240 $log->debug("Ingest isn't inside a transaction.", INFO);
243 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
249 sub commit_transaction {
253 OpenILS::Application::Ingest->post_init();
254 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
257 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
259 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
260 unless (defined $r and $r) {
261 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
262 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
264 #__PACKAGE__->st_sess->disconnect;
266 $log->debug("Ingest isn't inside a transaction.", INFO);
269 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
278 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
279 return shift( @res );
282 sub scrub_authority_record {
288 if (!OpenILS::Application::Ingest->in_transaction) {
289 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
295 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
297 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
298 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
300 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
302 $log->debug('Scrubbing failed : '.shift(), ERROR);
303 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
307 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
308 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
311 __PACKAGE__->register_method(
312 api_name => "open-ils.worm.scrub.authority",
313 method => "scrub_authority_record",
319 sub scrub_metabib_record {
324 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
325 $rec = OpenILS::Application::Ingest->storage_req(
326 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
331 if (!OpenILS::Application::Ingest->in_transaction) {
332 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
338 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
340 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
341 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
342 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
343 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
344 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
345 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
346 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
347 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
349 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
350 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
352 for my $mr (@$masters) {
353 $log->debug( "Found metarecord whose master is $rec", DEBUG);
354 my $others = OpenILS::Application::Ingest->storage_req(
355 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
358 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
359 $mr->master_record($others->[0]->source);
360 OpenILS::Application::Ingest->storage_req(
361 'open-ils.storage.direct.metabib.metarecord.remote_update',
363 { master_record => $others->[0]->source, mods => undef }
366 warn "Removing metarecord whose master is $rec";
367 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
368 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
369 warn "Metarecord removed";
370 $log->debug( "Metarecord removed", DEBUG);
374 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
377 $log->debug('Scrubbing failed : '.shift(), ERROR);
378 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
382 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
383 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
386 __PACKAGE__->register_method(
387 api_name => "open-ils.worm.scrub.biblio",
388 method => "scrub_metabib_record",
393 sub wormize_biblio_metarecord {
398 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
404 $success = wormize_biblio_record($self => $client => $r->source);
406 { record => $r->source,
407 metarecord => $rec->metarecord,
414 { record => $r->source,
415 metarecord => $rec->metarecord,
424 __PACKAGE__->register_method(
425 api_name => "open-ils.worm.wormize.metarecord",
426 method => "wormize_biblio_metarecord",
431 __PACKAGE__->register_method(
432 api_name => "open-ils.worm.wormize.metarecord.nomap",
433 method => "wormize_biblio_metarecord",
438 __PACKAGE__->register_method(
439 api_name => "open-ils.worm.wormize.metarecord.noscrub",
440 method => "wormize_biblio_metarecord",
445 __PACKAGE__->register_method(
446 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
447 method => "wormize_biblio_metarecord",
454 sub wormize_biblio_record {
459 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
460 $rec = OpenILS::Application::Ingest->storage_req(
461 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
467 if (!OpenILS::Application::Ingest->in_transaction) {
468 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
475 unless ($self->api_name =~ /noscrub/o) {
476 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
480 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
483 my @rec_descriptor = ();
495 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
497 my $xml = $parser->parse_string($r->marc);
499 #update the fingerprint
500 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
501 OpenILS::Application::Ingest->storage_req(
502 'open-ils.storage.direct.biblio.record_entry.remote_update',
504 { fingerprint => $fp->{fingerprint},
505 quality => int($fp->{quality}) }
506 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
509 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
510 $fr->record( $r->id );
514 # the rec_descriptor stuff
515 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
516 $rd->record( $r->id );
517 push @rec_descriptor, $rd;
519 # the indexing field entry stuff
520 for my $class ( qw/title author subject keyword series/ ) {
521 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
522 $fe->source( $r->id );
523 push @{$field_entry{$class}}, $fe;
527 unless ($self->api_name =~ /nomap/o) {
528 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
531 $mr = Fieldmapper::metabib::metarecord->new;
532 $mr->fingerprint( $fp->{fingerprint} );
533 $mr->master_record( $r->id );
534 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
537 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
538 $mr_map->metarecord( $mr->id );
539 $mr_map->source( $r->id );
540 push @source_map, $mr_map;
542 $metarecord{$mr->id} = $mr;
544 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
546 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
547 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
552 if (@rec_descriptor) {
553 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
555 OpenILS::Application::Ingest->storage_req(
556 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
560 for my $mr ( values %metarecord ) {
561 my $sources = OpenILS::Application::Ingest->storage_req(
562 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
566 my $bibs = OpenILS::Application::Ingest->storage_req(
567 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
568 [ map { $_->source } @$sources ]
571 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
573 OpenILS::Application::Ingest->storage_req(
574 'open-ils.storage.direct.metabib.metarecord.remote_update',
576 { master_record => $master->id, mods => undef }
580 OpenILS::Application::Ingest->storage_req(
581 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
583 ) if (@rec_descriptor);
585 OpenILS::Application::Ingest->storage_req(
586 'open-ils.storage.direct.metabib.full_rec.batch.create',
590 OpenILS::Application::Ingest->storage_req(
591 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
592 @{ $field_entry{title} }
593 ) if (@{ $field_entry{title} });
595 OpenILS::Application::Ingest->storage_req(
596 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
597 @{ $field_entry{author} }
598 ) if (@{ $field_entry{author} });
600 OpenILS::Application::Ingest->storage_req(
601 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
602 @{ $field_entry{subject} }
603 ) if (@{ $field_entry{subject} });
605 OpenILS::Application::Ingest->storage_req(
606 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
607 @{ $field_entry{keyword} }
608 ) if (@{ $field_entry{keyword} });
610 OpenILS::Application::Ingest->storage_req(
611 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
612 @{ $field_entry{series} }
613 ) if (@{ $field_entry{series} });
615 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
621 $log->debug('Wormization failed : '.shift(), ERROR);
622 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
626 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
627 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
630 __PACKAGE__->register_method(
631 api_name => "open-ils.worm.wormize.biblio",
632 method => "wormize_biblio_record",
636 __PACKAGE__->register_method(
637 api_name => "open-ils.worm.wormize.biblio.nomap",
638 method => "wormize_biblio_record",
642 __PACKAGE__->register_method(
643 api_name => "open-ils.worm.wormize.biblio.noscrub",
644 method => "wormize_biblio_record",
648 __PACKAGE__->register_method(
649 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
650 method => "wormize_biblio_record",
655 sub wormize_authority_record {
661 if (!OpenILS::Application::Ingest->in_transaction) {
662 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
669 unless ($self->api_name =~ /noscrub/o) {
670 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
674 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
677 my @rec_descriptor = ();
679 my $xml = $parser->parse_string($r->marc);
682 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
683 $fr->record( $r->id );
687 # the rec_descriptor stuff -- XXX What does this mean for authority records?
688 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
689 #$rd->record( $r->id );
690 #push @rec_descriptor, $rd;
694 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
696 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
697 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
699 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
702 $log->debug('Wormization failed : '.shift(), ERROR);
703 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
707 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
708 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
711 __PACKAGE__->register_method(
712 api_name => "open-ils.worm.wormize.authority",
713 method => "wormize_authority_record",
717 __PACKAGE__->register_method(
718 api_name => "open-ils.worm.wormize.authority.noscrub",
719 method => "wormize_authority_record",
725 # --------------------------------------------------------------------------------
726 # MARC index extraction
728 package OpenILS::Application::Ingest::XPATH;
729 use base qw/OpenILS::Application::Ingest/;
730 use Unicode::Normalize;
732 # give this a MODS documentElement and an XPATH expression
733 sub _xpath_to_string {
737 my $ns_prefix = shift;
740 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
744 # grab the set of matching nodes
745 my @nodes = $xml->findnodes( $xpath );
746 for my $value (@nodes) {
748 # grab all children of the node
749 my @children = $value->childNodes();
750 for my $child (@children) {
752 # add the childs content to the growing buffer
753 my $content = quotemeta($child->textContent);
754 next if ($unique && $string =~ /$content/); # uniquify the values
755 $string .= $child->textContent . " ";
758 $string .= $value->textContent . " ";
764 sub class_all_index_string_xml {
770 OpenILS::Application::Ingest->post_init();
771 $xml = $parser->parse_string($xml) unless (ref $xml);
773 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
774 for my $type ( keys %{ $xpathset->{$class} } ) {
775 my $value = _xpath_to_string(
776 $mods_sheet->transform($xml)->documentElement,
777 $xpathset->{$class}->{$type}->{xpath},
778 "http://www.loc.gov/mods/",
785 $value =~ s/\pM+//sgo;
786 $value =~ s/\pC+//sgo;
787 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
789 $value =~ s/(\w)\./$1/sgo;
792 my $fm = $class_constructor->new;
793 $fm->value( $value );
794 $fm->field( $xpathset->{$class}->{$type}->{id} );
795 $client->respond($fm);
799 __PACKAGE__->register_method(
800 api_name => "open-ils.worm.field_entry.class.xml",
801 method => "class_all_index_string_xml",
807 sub class_all_index_string_record {
813 OpenILS::Application::Ingest->post_init();
814 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
816 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
818 $client->respond($fm);
822 __PACKAGE__->register_method(
823 api_name => "open-ils.worm.field_entry.class.record",
824 method => "class_all_index_string_record",
831 sub class_index_string_xml {
838 OpenILS::Application::Ingest->post_init();
839 $xml = $parser->parse_string($xml) unless (ref $xml);
840 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
842 __PACKAGE__->register_method(
843 api_name => "open-ils.worm.class.type.xml",
844 method => "class_index_string_xml",
849 sub class_index_string_record {
856 OpenILS::Application::Ingest->post_init();
857 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
859 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
860 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
863 __PACKAGE__->register_method(
864 api_name => "open-ils.worm.class.type.record",
865 method => "class_index_string_record",
879 OpenILS::Application::Ingest->post_init();
880 $xml = $parser->parse_string($xml) unless (ref $xml);
881 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
883 __PACKAGE__->register_method(
884 api_name => "open-ils.worm.xpath.xml",
885 method => "xml_xpath",
899 OpenILS::Application::Ingest->post_init();
900 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
902 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
903 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
906 __PACKAGE__->register_method(
907 api_name => "open-ils.worm.xpath.record",
908 method => "record_xpath",
914 # --------------------------------------------------------------------------------
917 package OpenILS::Application::Ingest::Biblio::Leader;
918 use base qw/OpenILS::Application::Ingest/;
919 use Unicode::Normalize;
921 our %marc_type_groups = (
933 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
937 our %biblio_descriptor_code = (
938 item_type => sub { substr($ldr,6,1); },
941 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
942 return substr($oo8,29,1);
943 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
944 return substr($oo8,23,1);
948 bib_level => sub { substr($ldr,7,1); },
949 control_type => sub { substr($ldr,8,1); },
950 char_encoding => sub { substr($ldr,9,1); },
951 enc_level => sub { substr($ldr,17,1); },
952 cat_form => sub { substr($ldr,18,1); },
953 pub_status => sub { substr($ldr,5,1); },
954 item_lang => sub { substr($oo8,35,3); },
955 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
956 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
957 audience => sub { substr($oo8,22,1); },
960 sub _extract_biblio_descriptors {
963 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
964 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
965 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
967 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
968 for my $rd_field ( keys %biblio_descriptor_code ) {
969 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
975 sub extract_biblio_desc_xml {
980 $xml = $parser->parse_string($xml) unless (ref $xml);
982 return _extract_biblio_descriptors( $xml );
984 __PACKAGE__->register_method(
985 api_name => "open-ils.worm.biblio_leader.xml",
986 method => "extract_biblio_desc_xml",
991 sub extract_biblio_desc_record {
996 OpenILS::Application::Ingest->post_init();
997 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
999 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1000 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1003 __PACKAGE__->register_method(
1004 api_name => "open-ils.worm.biblio_leader.record",
1005 method => "extract_biblio_desc_record",
1010 # --------------------------------------------------------------------------------
1013 package OpenILS::Application::Ingest::FlatMARC;
1014 use base qw/OpenILS::Application::Ingest/;
1015 use Unicode::Normalize;
1018 sub _marcxml_to_full_rows {
1020 my $marcxml = shift;
1021 my $xmltype = shift || 'metabib';
1023 my $type = "Fieldmapper::${xmltype}::full_rec";
1027 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1029 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1030 next unless $tagline;
1032 my $ns = $type->new;
1035 my $val = $tagline->textContent;
1037 $val =~ s/(\pM+)//gso;
1043 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1044 next unless $tagline;
1046 my $ns = $type->new;
1048 $ns->tag( $tagline->getAttribute( "tag" ) );
1049 my $val = $tagline->textContent;
1051 $val =~ s/(\pM+)//gso;
1057 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1058 next unless $tagline;
1060 my $tag = $tagline->getAttribute( "tag" );
1061 my $ind1 = $tagline->getAttribute( "ind1" );
1062 my $ind2 = $tagline->getAttribute( "ind2" );
1064 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1067 my $ns = $type->new;
1072 $ns->subfield( $data->getAttribute( "code" ) );
1073 my $val = $data->textContent;
1075 $val =~ s/(\pM+)//gso;
1076 $ns->value( lc($val) );
1082 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1091 $xml = $parser->parse_string($xml) unless (ref $xml);
1093 my $type = 'metabib';
1094 $type = 'authority' if ($self->api_name =~ /authority/o);
1096 OpenILS::Application::Ingest->post_init();
1098 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1101 __PACKAGE__->register_method(
1102 api_name => "open-ils.worm.flat_marc.authority.xml",
1103 method => "flat_marc_xml",
1108 __PACKAGE__->register_method(
1109 api_name => "open-ils.worm.flat_marc.biblio.xml",
1110 method => "flat_marc_xml",
1116 sub flat_marc_record {
1121 my $type = 'biblio';
1122 $type = 'authority' if ($self->api_name =~ /authority/o);
1124 OpenILS::Application::Ingest->post_init();
1125 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1127 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1130 __PACKAGE__->register_method(
1131 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1132 method => "flat_marc_record",
1137 __PACKAGE__->register_method(
1138 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1139 method => "flat_marc_record",
1146 # --------------------------------------------------------------------------------
1149 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1150 use base qw/OpenILS::Application::Ingest/;
1151 use Unicode::Normalize;
1152 use OpenSRF::EX qw/:try/;
1154 my @fp_mods_xpath = (
1155 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1158 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1159 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1160 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1161 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1164 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1166 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1167 $text =~ s/\pM+//gso;
1168 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1170 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1171 $text =~ s/\s+/ /sgo;
1172 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1173 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1174 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1175 $text =~ s/\b(?:the|an?)\b//sgo;
1176 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1177 $text =~ s/\[.[^\]]+\]//sgo;
1178 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1179 $text =~ s/\s*[;\/\.]*$//sgo;
1180 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1185 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1186 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1189 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1191 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1192 $text =~ s/\pM+//gso;
1193 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1195 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1196 $text =~ s/\s+/ /sgo;
1197 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1198 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1199 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1200 $text =~ s/,?\s+.*$//sgo;
1201 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1206 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1209 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1210 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1211 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1212 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1213 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1214 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1215 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1216 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1219 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1221 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1222 $text =~ s/\pM+//gso;
1223 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1225 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1226 $text =~ s/\s+/ /sgo;
1227 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1228 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1229 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1230 $text =~ s/\b(?:the|an?)\b//sgo;
1231 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1232 $text =~ s/\[.[^\]]+\]//sgo;
1233 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1234 $text =~ s/\s*[;\/\.]*$//sgo;
1235 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1240 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1241 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1242 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1243 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1246 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1248 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1249 $text =~ s/\pM+//gso;
1250 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1252 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1253 $text =~ s/\s+/ /sgo;
1254 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1255 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1256 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1257 $text =~ s/,?\s+.*$//sgo;
1258 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1265 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1269 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1273 my $match_index = 0;
1274 my $block_index = 1;
1275 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1276 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1278 my $block_name_index = 0;
1279 my $block_value_index = 1;
1280 my $block = $fp_mods_xpath[$block_index];
1281 while ( my $part = $$block[$block_value_index] ) {
1283 for my $xpath ( @{ $part->{xpath} } ) {
1284 $text = $mods->findvalue( $xpath );
1288 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1292 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1293 $fp_string .= $text;
1296 $block_name_index += 2;
1297 $block_value_index += 2;
1301 $fp_string =~ s/\W+//gso;
1302 $log->debug("Fingerprint is [$fp_string]", INFO);;
1312 sub refingerprint_bibrec {
1318 if (!OpenILS::Application::Ingest->in_transaction) {
1319 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1325 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1326 for my $b (@$bibs) {
1327 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1329 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1331 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1333 OpenILS::Application::Ingest->storage_req(
1334 'open-ils.storage.direct.biblio.record_entry.remote_update',
1336 { fingerprint => $fp->{fingerprint},
1337 quality => $fp->{quality} }
1340 if ($self->api_name !~ /nomap/o) {
1341 my $old_source_map = OpenILS::Application::Ingest->storage_req(
1342 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1347 if (ref($old_source_map) and @$old_source_map) {
1348 for my $m (@$old_source_map) {
1349 $old_mrid = $m->metarecord;
1350 OpenILS::Application::Ingest->storage_req(
1351 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1357 my $old_sm = OpenILS::Application::Ingest->storage_req(
1358 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1359 { metarecord => $old_mrid }
1362 if (ref($old_sm) and @$old_sm == 0) {
1363 OpenILS::Application::Ingest->storage_req(
1364 'open-ils.storage.direct.metabib.metarecord.delete',
1369 my $mr = OpenILS::Application::Ingest->storage_req(
1370 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1371 { fingerprint => $fp->{fingerprint} }
1375 $mr = Fieldmapper::metabib::metarecord->new;
1376 $mr->fingerprint( $fp->{fingerprint} );
1377 $mr->master_record( $b->id );
1378 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1381 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1382 $mr_map->metarecord( $mr->id );
1383 $mr_map->source( $b->id );
1384 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1388 $client->respond($b->id);
1392 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1396 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1397 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1400 __PACKAGE__->register_method(
1401 api_name => "open-ils.worm.fingerprint.record.update",
1402 method => "refingerprint_bibrec",
1408 __PACKAGE__->register_method(
1409 api_name => "open-ils.worm.fingerprint.record.update.nomap",
1410 method => "refingerprint_bibrec",
1417 sub fingerprint_bibrec {
1422 OpenILS::Application::Ingest->post_init();
1423 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1425 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1426 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1430 __PACKAGE__->register_method(
1431 api_name => "open-ils.worm.fingerprint.record",
1432 method => "fingerprint_bibrec",
1438 sub fingerprint_mods {
1443 OpenILS::Application::Ingest->post_init();
1444 my $mods = $parser->parse_string($xml)->documentElement;
1446 return _fp_mods( $mods );
1448 __PACKAGE__->register_method(
1449 api_name => "open-ils.worm.fingerprint.mods",
1450 method => "fingerprint_mods",
1455 sub fingerprint_marc {
1460 $xml = $parser->parse_string($xml) unless (ref $xml);
1462 OpenILS::Application::Ingest->post_init();
1463 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1464 $log->debug("Returning [$fp] as fingerprint", INFO);
1467 __PACKAGE__->register_method(
1468 api_name => "open-ils.worm.fingerprint.marc",
1469 method => "fingerprint_marc",
1477 sub biblio_fingerprint_record {
1482 OpenILS::Application::Ingest->post_init();
1484 my $marc = OpenILS::Application::Ingest
1485 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1488 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1489 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1492 __PACKAGE__->register_method(
1493 api_name => "open-ils.worm.fingerprint.record",
1494 method => "biblio_fingerprint_record",
1500 sub biblio_fingerprint {
1505 OpenILS::Application::Ingest->post_init();
1507 $marc = $parser->parse_string($marc) unless (ref $marc);
1509 my $mods = OpenILS::Application::Ingest::entityize(
1511 ->transform( $marc )
1517 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
1520 $log->internal("Got MARC [$marc]");
1521 $log->internal("Created MODS [$mods]");
1524 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1525 my $conf = OpenSRF::Utils::SettingsClient->new;
1527 my $libs = $conf->config_value(@pfx, 'script_path');
1528 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1529 my $script_libs = (ref($libs)) ? $libs : [$libs];
1531 $log->debug("Loading script $script_file for biblio fingerprinting...");
1533 $fp_script = new OpenILS::Utils::ScriptRunner
1534 ( file => $script_file,
1535 paths => $script_libs,
1536 reset_count => 1000 );
1539 $log->debug("Applying environment for biblio fingerprinting...");
1541 my $env = {marc => $marc, mods => $mods};
1542 #my $res = {fingerprint => '', quality => '0'};
1544 $fp_script->insert('environment' => $env);
1545 #$fp_script->insert('result' => $res);
1547 $log->debug("Running script for biblio fingerprinting...");
1549 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
1551 $log->debug("Script for biblio fingerprinting completed successfully...");
1555 __PACKAGE__->register_method(
1556 api_name => "open-ils.worm.fingerprint.marc",
1557 method => "biblio_fingerprint",
1562 # --------------------------------------------------------------------------------
1576 my $create_source_map;
1591 my %descriptor_code = (
1592 item_type => 'substr($ldr,6,1)',
1593 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1594 bib_level => 'substr($ldr,7,1)',
1595 control_type => 'substr($ldr,8,1)',
1596 char_encoding => 'substr($ldr,9,1)',
1597 enc_level => 'substr($ldr,17,1)',
1598 cat_form => 'substr($ldr,18,1)',
1599 pub_status => 'substr($ldr,5,1)',
1600 item_lang => 'substr($oo8,35,3)',
1601 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1602 audience => 'substr($oo8,22,1)',
1612 if ($self->api_name =~ /no_map/o) {
1616 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1618 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1620 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1622 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1624 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
1625 unless ($sm_lookup);
1626 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
1627 unless ($mr_lookup);
1628 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
1629 unless ($mr_update);
1630 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
1632 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
1633 unless ($update_entry);
1634 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
1635 unless ($rm_old_sm);
1636 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
1637 unless ($rm_old_rd);
1638 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
1639 unless ($rm_old_fr);
1640 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
1641 unless ($rm_old_tr);
1642 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
1643 unless ($rm_old_ar);
1644 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
1645 unless ($rm_old_sr);
1646 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
1647 unless ($rm_old_kr);
1648 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
1649 unless ($rm_old_ser);
1650 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
1651 unless ($mr_create);
1652 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
1653 unless ($create_source_map);
1654 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
1655 unless ($rd_create);
1656 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
1657 unless ($fr_create);
1658 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
1659 unless ($$create{title});
1660 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
1661 unless ($$create{author});
1662 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
1663 unless ($$create{subject});
1664 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
1665 unless ($$create{keyword});
1666 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
1667 unless ($$create{series});
1670 my ($outer_xact) = $in_xact->run;
1672 unless ($outer_xact) {
1673 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1674 my ($r) = $begin->run($client);
1675 unless (defined $r and $r) {
1677 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1680 } catch Error with {
1681 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
1691 for my $entry ( $lookup->run(@docids) ) {
1692 # step -1: grab the doc from storage
1693 next unless ($entry);
1696 my $xslt_doc = $parser->parse_file(
1697 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
1698 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
1701 my $xml = $entry->marc;
1702 my $docid = $entry->id;
1703 my $marcdoc = $parser->parse_string($xml);
1704 my $modsdoc = $mods_sheet->transform($marcdoc);
1706 my $mods = $modsdoc->documentElement;
1707 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1709 $entry->fingerprint( fingerprint_mods( $mods ) );
1710 push @entry_list, $entry;
1712 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
1715 my ($mr) = $mr_lookup->run( $entry->fingerprint );
1716 if (!$mr || !@$mr) {
1717 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
1718 $mr = new Fieldmapper::metabib::metarecord;
1719 $mr->fingerprint( $entry->fingerprint );
1720 $mr->master_record( $entry->id );
1721 my ($new_mr) = $mr_create->run($mr);
1723 unless (defined $mr) {
1724 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
1727 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
1732 my $sm = new Fieldmapper::metabib::metarecord_source_map;
1733 $sm->metarecord( $mr->id );
1734 $sm->source( $entry->id );
1735 push @source_maps, $sm;
1738 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1739 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1741 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1742 for my $rd_field ( keys %descriptor_code ) {
1743 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1745 $rd_obj->record( $docid );
1746 push @rd_list, $rd_obj;
1748 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
1750 # step 2: build the KOHA rows
1751 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
1752 $_->record( $docid ) for (@tmp_list);
1753 push @ns_list, @tmp_list;
1757 last unless ($self->api_name =~ /batch$/o);
1760 $rm_old_rd->run( { record => \@docids } );
1761 $rm_old_fr->run( { record => \@docids } );
1762 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
1763 $rm_old_tr->run( { source => \@docids } );
1764 $rm_old_ar->run( { source => \@docids } );
1765 $rm_old_sr->run( { source => \@docids } );
1766 $rm_old_kr->run( { source => \@docids } );
1767 $rm_old_ser->run( { source => \@docids } );
1770 my ($sm) = $create_source_map->run(@source_maps);
1771 unless (defined $sm) {
1772 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
1774 my ($mr) = $mr_update->run(@mr_list);
1775 unless (defined $mr) {
1776 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
1780 my ($re) = $update_entry->run(@entry_list);
1781 unless (defined $re) {
1782 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
1785 my ($rd) = $rd_create->run(@rd_list);
1786 unless (defined $rd) {
1787 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
1790 my ($fr) = $fr_create->run(@ns_list);
1791 unless (defined $fr) {
1792 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
1795 # step 5: insert the new metadata
1796 for my $class ( qw/title author subject keyword series/ ) {
1798 for my $doc ( @mods_data ) {
1799 my ($did) = keys %$doc;
1800 my ($data) = values %$doc;
1802 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
1803 for my $row ( keys %{ $$data{$class} } ) {
1804 next unless (exists $$data{$class}{$row});
1805 next unless ($$data{$class}{$row}{value});
1806 my $fm_obj = $fm_constructor->new;
1807 $fm_obj->value( $$data{$class}{$row}{value} );
1808 $fm_obj->field( $$data{$class}{$row}{field_id} );
1809 $fm_obj->source( $did );
1810 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
1812 push @md_list, $fm_obj;
1816 my ($cr) = $$create{$class}->run(@md_list);
1817 unless (defined $cr) {
1818 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
1822 unless ($outer_xact) {
1823 $log->debug("Commiting transaction started by the Ingest.", INFO);
1824 my ($c) = $commit->run;
1825 unless (defined $c and $c) {
1827 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1833 __PACKAGE__->register_method(
1834 api_name => "open-ils.worm.wormize",
1835 method => "wormize",
1839 __PACKAGE__->register_method(
1840 api_name => "open-ils.worm.wormize.no_map",
1841 method => "wormize",
1845 __PACKAGE__->register_method(
1846 api_name => "open-ils.worm.wormize.batch",
1847 method => "wormize",
1851 __PACKAGE__->register_method(
1852 api_name => "open-ils.worm.wormize.no_map.batch",
1853 method => "wormize",
1868 my $acreate_source_map;
1883 sub authority_wormize {
1890 if ($self->api_name =~ /no_map/o) {
1894 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1896 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1898 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1900 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1902 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1904 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1905 unless ($aupdate_entry);
1906 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1907 unless ($arm_old_rd);
1908 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1909 unless ($arm_old_fr);
1910 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1911 unless ($ard_create);
1912 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1913 unless ($afr_create);
1916 my ($outer_xact) = $in_xact->run;
1918 unless ($outer_xact) {
1919 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1920 my ($r) = $begin->run($client);
1921 unless (defined $r and $r) {
1923 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1926 } catch Error with {
1927 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
1937 for my $entry ( $lookup->run(@docids) ) {
1938 # step -1: grab the doc from storage
1939 next unless ($entry);
1942 # my $xslt_doc = $parser->parse_file(
1943 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
1944 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1947 my $xml = $entry->marc;
1948 my $docid = $entry->id;
1949 my $marcdoc = $parser->parse_string($xml);
1950 #my $madsdoc = $mads_sheet->transform($marcdoc);
1952 #my $mads = $madsdoc->documentElement;
1953 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1955 push @entry_list, $entry;
1957 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1958 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1960 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1961 for my $rd_field ( keys %descriptor_code ) {
1962 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1964 $rd_obj->record( $docid );
1965 push @rd_list, $rd_obj;
1967 # step 2: build the KOHA rows
1968 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1969 $_->record( $docid ) for (@tmp_list);
1970 push @ns_list, @tmp_list;
1974 last unless ($self->api_name =~ /batch$/o);
1977 $arm_old_rd->run( { record => \@docids } );
1978 $arm_old_fr->run( { record => \@docids } );
1980 my ($rd) = $ard_create->run(@rd_list);
1981 unless (defined $rd) {
1982 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1985 my ($fr) = $fr_create->run(@ns_list);
1986 unless (defined $fr) {
1987 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1990 unless ($outer_xact) {
1991 $log->debug("Commiting transaction started by Ingest.", INFO);
1992 my ($c) = $commit->run;
1993 unless (defined $c and $c) {
1995 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2001 __PACKAGE__->register_method(
2002 api_name => "open-ils.worm.authortiy.wormize",
2003 method => "wormize",
2007 __PACKAGE__->register_method(
2008 api_name => "open-ils.worm.authority.wormize.batch",
2009 method => "wormize",
2015 # --------------------------------------------------------------------------------
2018 sub _marcxml_to_full_rows {
2020 my $marcxml = shift;
2021 my $type = shift || 'Fieldmapper::metabib::full_rec';
2025 my $root = $marcxml->documentElement;
2027 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2028 next unless $tagline;
2030 my $ns = new Fieldmapper::metabib::full_rec;
2033 my $val = NFD($tagline->textContent);
2034 $val =~ s/(\pM+)//gso;
2040 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2041 next unless $tagline;
2043 my $ns = new Fieldmapper::metabib::full_rec;
2045 $ns->tag( $tagline->getAttribute( "tag" ) );
2046 my $val = NFD($tagline->textContent);
2047 $val =~ s/(\pM+)//gso;
2053 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2054 next unless $tagline;
2056 my $tag = $tagline->getAttribute( "tag" );
2057 my $ind1 = $tagline->getAttribute( "ind1" );
2058 my $ind2 = $tagline->getAttribute( "ind2" );
2060 for my $data ( $tagline->childNodes ) {
2063 my $ns = $type->new;
2068 $ns->subfield( $data->getAttribute( "code" ) );
2069 my $val = NFD($data->textContent);
2070 $val =~ s/(\pM+)//gso;
2071 $ns->value( lc($val) );
2079 sub _get_field_value {
2081 my( $root, $xpath ) = @_;
2085 # grab the set of matching nodes
2086 my @nodes = $root->findnodes( $xpath );
2087 for my $value (@nodes) {
2089 # grab all children of the node
2090 my @children = $value->childNodes();
2091 for my $child (@children) {
2093 # add the childs content to the growing buffer
2094 my $content = quotemeta($child->textContent);
2095 next if ($string =~ /$content/); # uniquify the values
2096 $string .= $child->textContent . " ";
2099 $string .= $value->textContent . " ";
2102 $string = NFD($string);
2103 $string =~ s/(\pM)//gso;
2108 sub modsdoc_to_values {
2109 my( $self, $mods ) = @_;
2111 for my $class (keys %$xpathset) {
2112 $data->{$class} = {};
2113 for my $type (keys %{$xpathset->{$class}}) {
2114 $data->{$class}->{$type} = {};
2115 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};