1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::FlatXML;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
30 our $log = 'OpenSRF::Utils::Logger';
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
43 unless (keys %$xpathset) {
44 $log->debug("Running post_init", DEBUG);
46 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
48 unless ($supported_formats{mods}{xslt}) {
49 $log->debug("Loading MODS XSLT", DEBUG);
50 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
54 unless ($supported_formats{mods3}{xslt}) {
55 $log->debug("Loading MODS v3 XSLT", DEBUG);
56 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61 my $req = OpenSRF::AppSession
62 ->create('open-ils.cstore')
64 'open-ils.cstore.direct.config.metabib_field.search.atomic',
65 { id => { '!=' => undef } }
69 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
70 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
71 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
72 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
87 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
91 # --------------------------------------------------------------------------------
92 # MARC index extraction
94 package OpenILS::Application::Ingest::XPATH;
95 use base qw/OpenILS::Application::Ingest/;
96 use Unicode::Normalize;
98 # give this an XML documentElement and an XPATH expression
103 my $ns_prefix = shift;
106 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
110 # grab the set of matching nodes
111 my @nodes = $xml->findnodes( $xpath );
112 for my $value (@nodes) {
114 # grab all children of the node
115 my @children = $value->childNodes();
116 for my $child (@children) {
118 # add the childs content to the growing buffer
119 my $content = quotemeta($child->textContent);
120 next if ($unique && $string =~ /$content/); # uniquify the values
121 $string .= $child->textContent . " ";
124 $string .= $value->textContent . " ";
130 sub class_index_string_xml {
136 OpenILS::Application::Ingest->post_init();
137 $xml = $parser->parse_string($xml) unless (ref $xml);
139 for my $class (@classes) {
140 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
141 for my $type ( keys %{ $xpathset->{$class} } ) {
143 my $def = $xpathset->{$class}->{$type};
144 my $value = xpath_to_string(
145 $mods_sheet->transform($xml)->documentElement,
147 $supported_formats{$def->{format}}{ns},
154 $value =~ s/\pM+//sgo;
155 $value =~ s/\pC+//sgo;
156 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
158 $value =~ s/(\w)\./$1/sgo;
161 my $fm = $class_constructor->new;
162 $fm->value( $value );
163 $fm->field( $xpathset->{$class}->{$type}->{id} );
164 $client->respond($fm);
169 __PACKAGE__->register_method(
170 api_name => "open-ils.ingest.field_entry.class.xml",
171 method => "class_index_string_xml",
177 sub class_index_string_record {
183 OpenILS::Application::Ingest->post_init();
184 my $r = OpenSRF::AppSession
185 ->create('open-ils.cstore')
187 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec
190 return undef unless ($r and @$r)
192 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
194 $client->respond($fm);
198 __PACKAGE__->register_method(
199 api_name => "open-ils.ingest.field_entry.class.record",
200 method => "class_index_string_record",
206 sub all_index_string_xml {
211 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
212 $client->respond($fm);
216 __PACKAGE__->register_method(
217 api_name => "open-ils.ingest.extract.field_entry.all.xml",
218 method => "all_index_string_xml",
224 sub all_index_string_record {
229 OpenILS::Application::Ingest->post_init();
230 my $r = OpenSRF::AppSession
231 ->create('open-ils.cstore')
233 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec
237 return undef unless ($r and @$r)
239 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
241 $client->respond($fm);
245 __PACKAGE__->register_method(
246 api_name => "open-ils.ingest.extract.field_entry.all.record",
247 method => "all_index_string_record",
253 # --------------------------------------------------------------------------------
256 package OpenILS::Application::Ingest::FlatMARC;
257 use base qw/OpenILS::Application::Ingest/;
258 use Unicode::Normalize;
261 sub _marcxml_to_full_rows {
264 my $xmltype = shift || 'metabib';
266 my $type = "Fieldmapper::${xmltype}::full_rec";
270 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
272 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
273 next unless $tagline;
278 my $val = $tagline->textContent;
280 $val =~ s/(\pM+)//gso;
286 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
287 next unless $tagline;
291 $ns->tag( $tagline->getAttribute( "tag" ) );
292 my $val = $tagline->textContent;
294 $val =~ s/(\pM+)//gso;
300 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
301 next unless $tagline;
303 my $tag = $tagline->getAttribute( "tag" );
304 my $ind1 = $tagline->getAttribute( "ind1" );
305 my $ind2 = $tagline->getAttribute( "ind2" );
307 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
315 $ns->subfield( $data->getAttribute( "code" ) );
316 my $val = $data->textContent;
318 $val =~ s/(\pM+)//gso;
319 $ns->value( lc($val) );
325 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
334 $xml = $parser->parse_string($xml) unless (ref $xml);
336 my $type = 'metabib';
337 $type = 'authority' if ($self->api_name =~ /authority/o);
339 OpenILS::Application::Ingest->post_init();
341 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
344 __PACKAGE__->register_method(
345 api_name => "open-ils.ingest.flat_marc.authority.xml",
346 method => "flat_marc_xml",
351 __PACKAGE__->register_method(
352 api_name => "open-ils.ingest.flat_marc.biblio.xml",
353 method => "flat_marc_xml",
359 sub flat_marc_record {
365 $type = 'authority' if ($self->api_name =~ /authority/o);
367 OpenILS::Application::Ingest->post_init();
368 my $r = OpenSRF::AppSession
369 ->create('open-ils.cstore')
370 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec );
372 $client->respond($_) for ($self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc));
375 __PACKAGE__->register_method(
376 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
377 method => "flat_marc_record",
382 __PACKAGE__->register_method(
383 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
384 method => "flat_marc_record",
390 # --------------------------------------------------------------------------------
393 package OpenILS::Application::Ingest::Biblio::Fingerprint;
394 use base qw/OpenILS::Application::Ingest/;
395 use Unicode::Normalize;
396 use OpenSRF::EX qw/:try/;
398 sub biblio_fingerprint_record {
403 OpenILS::Application::Ingest->post_init();
405 my $r = OpenSRF::AppSession
406 ->create('open-ils.cstore')
407 ->request( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
409 return undef unless ($r and $r->marc);
411 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
412 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
415 __PACKAGE__->register_method(
416 api_name => "open-ils.worm.fingerprint.record",
417 method => "biblio_fingerprint_record",
423 sub biblio_fingerprint {
428 $log->internal("Got MARC [$xml]");
431 my @pfx = ( "apps", "open-ils.storage","app_settings" );
432 my $conf = OpenSRF::Utils::SettingsClient->new;
434 my $libs = $conf->config_value(@pfx, 'script_path');
435 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
436 my $script_libs = (ref($libs)) ? $libs : [$libs];
438 $log->debug("Loading script $script_file for biblio fingerprinting...");
440 $fp_script = new OpenILS::Utils::ScriptRunner
441 ( file => $script_file,
442 paths => $script_libs,
443 reset_count => 1000 );
446 $fp_script->insert('environment' => {marc => $marc} => 1);
448 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
449 $log->debug("Script for biblio fingerprinting completed successfully...");
453 __PACKAGE__->register_method(
454 api_name => "open-ils.ingest.fingerprint.xml",
455 method => "biblio_fingerprint",
466 OpenILS::Application::Ingest->post_init();
467 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
470 sub begin_transaction {
474 OpenILS::Application::Ingest->post_init();
475 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
479 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
480 #__PACKAGE__->st_sess->connect;
481 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
482 unless (defined $r and $r) {
483 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
484 #__PACKAGE__->st_sess->disconnect;
485 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
489 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
492 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
495 sub rollback_transaction {
499 OpenILS::Application::Ingest->post_init();
500 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
504 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
506 $log->debug("Ingest isn't inside a transaction.", INFO);
509 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
515 sub commit_transaction {
519 OpenILS::Application::Ingest->post_init();
520 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
523 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
525 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
526 unless (defined $r and $r) {
527 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
528 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
530 #__PACKAGE__->st_sess->disconnect;
532 $log->debug("Ingest isn't inside a transaction.", INFO);
535 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
544 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
545 return shift( @res );
548 sub scrub_authority_record {
554 if (!OpenILS::Application::Ingest->in_transaction) {
555 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
561 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
563 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
564 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
566 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
568 $log->debug('Scrubbing failed : '.shift(), ERROR);
569 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
573 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
574 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
577 __PACKAGE__->register_method(
578 api_name => "open-ils.worm.scrub.authority",
579 method => "scrub_authority_record",
585 sub scrub_metabib_record {
590 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
591 $rec = OpenILS::Application::Ingest->storage_req(
592 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
597 if (!OpenILS::Application::Ingest->in_transaction) {
598 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
604 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
606 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
607 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
608 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
609 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
610 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
611 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
612 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
613 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
615 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
616 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
618 for my $mr (@$masters) {
619 $log->debug( "Found metarecord whose master is $rec", DEBUG);
620 my $others = OpenILS::Application::Ingest->storage_req(
621 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
624 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
625 $mr->master_record($others->[0]->source);
626 OpenILS::Application::Ingest->storage_req(
627 'open-ils.storage.direct.metabib.metarecord.remote_update',
629 { master_record => $others->[0]->source, mods => undef }
632 warn "Removing metarecord whose master is $rec";
633 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
634 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
635 warn "Metarecord removed";
636 $log->debug( "Metarecord removed", DEBUG);
640 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
643 $log->debug('Scrubbing failed : '.shift(), ERROR);
644 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
648 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
649 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
652 __PACKAGE__->register_method(
653 api_name => "open-ils.worm.scrub.biblio",
654 method => "scrub_metabib_record",
659 sub wormize_biblio_metarecord {
664 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
670 $success = wormize_biblio_record($self => $client => $r->source);
672 { record => $r->source,
673 metarecord => $rec->metarecord,
680 { record => $r->source,
681 metarecord => $rec->metarecord,
690 __PACKAGE__->register_method(
691 api_name => "open-ils.worm.wormize.metarecord",
692 method => "wormize_biblio_metarecord",
697 __PACKAGE__->register_method(
698 api_name => "open-ils.worm.wormize.metarecord.nomap",
699 method => "wormize_biblio_metarecord",
704 __PACKAGE__->register_method(
705 api_name => "open-ils.worm.wormize.metarecord.noscrub",
706 method => "wormize_biblio_metarecord",
711 __PACKAGE__->register_method(
712 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
713 method => "wormize_biblio_metarecord",
720 sub wormize_biblio_record {
725 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
726 $rec = OpenILS::Application::Ingest->storage_req(
727 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
733 if (!OpenILS::Application::Ingest->in_transaction) {
734 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
741 unless ($self->api_name =~ /noscrub/o) {
742 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
746 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
749 my @rec_descriptor = ();
761 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
763 my $xml = $parser->parse_string($r->marc);
765 #update the fingerprint
766 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
767 OpenILS::Application::Ingest->storage_req(
768 'open-ils.storage.direct.biblio.record_entry.remote_update',
770 { fingerprint => $fp->{fingerprint},
771 quality => int($fp->{quality}) }
772 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
775 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
776 $fr->record( $r->id );
780 # the rec_descriptor stuff
781 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
782 $rd->record( $r->id );
783 push @rec_descriptor, $rd;
785 # the indexing field entry stuff
786 for my $class ( qw/title author subject keyword series/ ) {
787 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
788 $fe->source( $r->id );
789 push @{$field_entry{$class}}, $fe;
793 unless ($self->api_name =~ /nomap/o) {
794 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
797 $mr = Fieldmapper::metabib::metarecord->new;
798 $mr->fingerprint( $fp->{fingerprint} );
799 $mr->master_record( $r->id );
800 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
803 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
804 $mr_map->metarecord( $mr->id );
805 $mr_map->source( $r->id );
806 push @source_map, $mr_map;
808 $metarecord{$mr->id} = $mr;
810 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
812 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
813 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
818 if (@rec_descriptor) {
819 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
821 OpenILS::Application::Ingest->storage_req(
822 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
826 for my $mr ( values %metarecord ) {
827 my $sources = OpenILS::Application::Ingest->storage_req(
828 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
832 my $bibs = OpenILS::Application::Ingest->storage_req(
833 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
834 [ map { $_->source } @$sources ]
837 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
839 OpenILS::Application::Ingest->storage_req(
840 'open-ils.storage.direct.metabib.metarecord.remote_update',
842 { master_record => $master->id, mods => undef }
846 OpenILS::Application::Ingest->storage_req(
847 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
849 ) if (@rec_descriptor);
851 OpenILS::Application::Ingest->storage_req(
852 'open-ils.storage.direct.metabib.full_rec.batch.create',
856 OpenILS::Application::Ingest->storage_req(
857 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
858 @{ $field_entry{title} }
859 ) if (@{ $field_entry{title} });
861 OpenILS::Application::Ingest->storage_req(
862 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
863 @{ $field_entry{author} }
864 ) if (@{ $field_entry{author} });
866 OpenILS::Application::Ingest->storage_req(
867 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
868 @{ $field_entry{subject} }
869 ) if (@{ $field_entry{subject} });
871 OpenILS::Application::Ingest->storage_req(
872 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
873 @{ $field_entry{keyword} }
874 ) if (@{ $field_entry{keyword} });
876 OpenILS::Application::Ingest->storage_req(
877 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
878 @{ $field_entry{series} }
879 ) if (@{ $field_entry{series} });
881 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
887 $log->debug('Wormization failed : '.shift(), ERROR);
888 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
892 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
893 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
896 __PACKAGE__->register_method(
897 api_name => "open-ils.worm.wormize.biblio",
898 method => "wormize_biblio_record",
902 __PACKAGE__->register_method(
903 api_name => "open-ils.worm.wormize.biblio.nomap",
904 method => "wormize_biblio_record",
908 __PACKAGE__->register_method(
909 api_name => "open-ils.worm.wormize.biblio.noscrub",
910 method => "wormize_biblio_record",
914 __PACKAGE__->register_method(
915 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
916 method => "wormize_biblio_record",
921 sub wormize_authority_record {
927 if (!OpenILS::Application::Ingest->in_transaction) {
928 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
935 unless ($self->api_name =~ /noscrub/o) {
936 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
940 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
943 my @rec_descriptor = ();
945 my $xml = $parser->parse_string($r->marc);
948 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
949 $fr->record( $r->id );
953 # the rec_descriptor stuff -- XXX What does this mean for authority records?
954 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
955 #$rd->record( $r->id );
956 #push @rec_descriptor, $rd;
960 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
962 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
963 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
965 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
968 $log->debug('Wormization failed : '.shift(), ERROR);
969 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
973 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
974 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
977 __PACKAGE__->register_method(
978 api_name => "open-ils.worm.wormize.authority",
979 method => "wormize_authority_record",
983 __PACKAGE__->register_method(
984 api_name => "open-ils.worm.wormize.authority.noscrub",
985 method => "wormize_authority_record",
991 # --------------------------------------------------------------------------------
992 # MARC index extraction
994 package OpenILS::Application::Ingest::XPATH;
995 use base qw/OpenILS::Application::Ingest/;
996 use Unicode::Normalize;
998 # give this a MODS documentElement and an XPATH expression
999 sub _xpath_to_string {
1003 my $ns_prefix = shift;
1006 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1010 # grab the set of matching nodes
1011 my @nodes = $xml->findnodes( $xpath );
1012 for my $value (@nodes) {
1014 # grab all children of the node
1015 my @children = $value->childNodes();
1016 for my $child (@children) {
1018 # add the childs content to the growing buffer
1019 my $content = quotemeta($child->textContent);
1020 next if ($unique && $string =~ /$content/); # uniquify the values
1021 $string .= $child->textContent . " ";
1024 $string .= $value->textContent . " ";
1027 return NFD($string);
1030 sub class_all_index_string_xml {
1036 OpenILS::Application::Ingest->post_init();
1037 $xml = $parser->parse_string($xml) unless (ref $xml);
1039 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1040 for my $type ( keys %{ $xpathset->{$class} } ) {
1041 my $value = _xpath_to_string(
1042 $mods_sheet->transform($xml)->documentElement,
1043 $xpathset->{$class}->{$type}->{xpath},
1044 "http://www.loc.gov/mods/",
1051 $value =~ s/\pM+//sgo;
1052 $value =~ s/\pC+//sgo;
1053 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1055 $value =~ s/(\w)\./$1/sgo;
1056 $value = lc($value);
1058 my $fm = $class_constructor->new;
1059 $fm->value( $value );
1060 $fm->field( $xpathset->{$class}->{$type}->{id} );
1061 $client->respond($fm);
1065 __PACKAGE__->register_method(
1066 api_name => "open-ils.worm.field_entry.class.xml",
1067 method => "class_all_index_string_xml",
1073 sub class_all_index_string_record {
1079 OpenILS::Application::Ingest->post_init();
1080 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1082 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1084 $client->respond($fm);
1088 __PACKAGE__->register_method(
1089 api_name => "open-ils.worm.field_entry.class.record",
1090 method => "class_all_index_string_record",
1097 sub class_index_string_xml {
1104 OpenILS::Application::Ingest->post_init();
1105 $xml = $parser->parse_string($xml) unless (ref $xml);
1106 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1108 __PACKAGE__->register_method(
1109 api_name => "open-ils.worm.class.type.xml",
1110 method => "class_index_string_xml",
1115 sub class_index_string_record {
1122 OpenILS::Application::Ingest->post_init();
1123 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1125 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1126 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1129 __PACKAGE__->register_method(
1130 api_name => "open-ils.worm.class.type.record",
1131 method => "class_index_string_record",
1145 OpenILS::Application::Ingest->post_init();
1146 $xml = $parser->parse_string($xml) unless (ref $xml);
1147 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1149 __PACKAGE__->register_method(
1150 api_name => "open-ils.worm.xpath.xml",
1151 method => "xml_xpath",
1165 OpenILS::Application::Ingest->post_init();
1166 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1168 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1169 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1172 __PACKAGE__->register_method(
1173 api_name => "open-ils.worm.xpath.record",
1174 method => "record_xpath",
1180 # --------------------------------------------------------------------------------
1183 package OpenILS::Application::Ingest::Biblio::Leader;
1184 use base qw/OpenILS::Application::Ingest/;
1185 use Unicode::Normalize;
1187 our %marc_type_groups = (
1190 VIS => q/[gkro]{1}/,
1199 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1203 our %biblio_descriptor_code = (
1204 item_type => sub { substr($ldr,6,1); },
1207 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1208 return substr($oo8,29,1);
1209 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1210 return substr($oo8,23,1);
1214 bib_level => sub { substr($ldr,7,1); },
1215 control_type => sub { substr($ldr,8,1); },
1216 char_encoding => sub { substr($ldr,9,1); },
1217 enc_level => sub { substr($ldr,17,1); },
1218 cat_form => sub { substr($ldr,18,1); },
1219 pub_status => sub { substr($ldr,5,1); },
1220 item_lang => sub { substr($oo8,35,3); },
1221 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1222 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1223 audience => sub { substr($oo8,22,1); },
1226 sub _extract_biblio_descriptors {
1229 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1230 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1231 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1233 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1234 for my $rd_field ( keys %biblio_descriptor_code ) {
1235 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1241 sub extract_biblio_desc_xml {
1246 $xml = $parser->parse_string($xml) unless (ref $xml);
1248 return _extract_biblio_descriptors( $xml );
1250 __PACKAGE__->register_method(
1251 api_name => "open-ils.worm.biblio_leader.xml",
1252 method => "extract_biblio_desc_xml",
1257 sub extract_biblio_desc_record {
1262 OpenILS::Application::Ingest->post_init();
1263 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1265 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1266 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1269 __PACKAGE__->register_method(
1270 api_name => "open-ils.worm.biblio_leader.record",
1271 method => "extract_biblio_desc_record",
1276 # --------------------------------------------------------------------------------
1279 package OpenILS::Application::Ingest::FlatMARC;
1280 use base qw/OpenILS::Application::Ingest/;
1281 use Unicode::Normalize;
1284 sub _marcxml_to_full_rows {
1286 my $marcxml = shift;
1287 my $xmltype = shift || 'metabib';
1289 my $type = "Fieldmapper::${xmltype}::full_rec";
1293 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1295 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1296 next unless $tagline;
1298 my $ns = $type->new;
1301 my $val = $tagline->textContent;
1303 $val =~ s/(\pM+)//gso;
1309 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1310 next unless $tagline;
1312 my $ns = $type->new;
1314 $ns->tag( $tagline->getAttribute( "tag" ) );
1315 my $val = $tagline->textContent;
1317 $val =~ s/(\pM+)//gso;
1323 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1324 next unless $tagline;
1326 my $tag = $tagline->getAttribute( "tag" );
1327 my $ind1 = $tagline->getAttribute( "ind1" );
1328 my $ind2 = $tagline->getAttribute( "ind2" );
1330 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1333 my $ns = $type->new;
1338 $ns->subfield( $data->getAttribute( "code" ) );
1339 my $val = $data->textContent;
1341 $val =~ s/(\pM+)//gso;
1342 $ns->value( lc($val) );
1348 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1357 $xml = $parser->parse_string($xml) unless (ref $xml);
1359 my $type = 'metabib';
1360 $type = 'authority' if ($self->api_name =~ /authority/o);
1362 OpenILS::Application::Ingest->post_init();
1364 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1367 __PACKAGE__->register_method(
1368 api_name => "open-ils.worm.flat_marc.authority.xml",
1369 method => "flat_marc_xml",
1374 __PACKAGE__->register_method(
1375 api_name => "open-ils.worm.flat_marc.biblio.xml",
1376 method => "flat_marc_xml",
1382 sub flat_marc_record {
1387 my $type = 'biblio';
1388 $type = 'authority' if ($self->api_name =~ /authority/o);
1390 OpenILS::Application::Ingest->post_init();
1391 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1393 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1396 __PACKAGE__->register_method(
1397 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1398 method => "flat_marc_record",
1403 __PACKAGE__->register_method(
1404 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1405 method => "flat_marc_record",
1412 # --------------------------------------------------------------------------------
1415 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1416 use base qw/OpenILS::Application::Ingest/;
1417 use Unicode::Normalize;
1418 use OpenSRF::EX qw/:try/;
1420 my @fp_mods_xpath = (
1421 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1424 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1425 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1426 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1427 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1430 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1432 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1433 $text =~ s/\pM+//gso;
1434 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1436 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1437 $text =~ s/\s+/ /sgo;
1438 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1439 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1440 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1441 $text =~ s/\b(?:the|an?)\b//sgo;
1442 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1443 $text =~ s/\[.[^\]]+\]//sgo;
1444 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1445 $text =~ s/\s*[;\/\.]*$//sgo;
1446 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1451 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1452 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1455 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1457 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1458 $text =~ s/\pM+//gso;
1459 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1461 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1462 $text =~ s/\s+/ /sgo;
1463 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1464 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1465 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1466 $text =~ s/,?\s+.*$//sgo;
1467 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1472 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1475 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1476 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1477 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1478 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1479 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1480 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1481 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1482 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1485 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1487 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1488 $text =~ s/\pM+//gso;
1489 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1491 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1492 $text =~ s/\s+/ /sgo;
1493 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1494 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1495 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1496 $text =~ s/\b(?:the|an?)\b//sgo;
1497 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1498 $text =~ s/\[.[^\]]+\]//sgo;
1499 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1500 $text =~ s/\s*[;\/\.]*$//sgo;
1501 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1506 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1507 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1508 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1509 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1512 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1514 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1515 $text =~ s/\pM+//gso;
1516 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1518 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1519 $text =~ s/\s+/ /sgo;
1520 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1521 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1522 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1523 $text =~ s/,?\s+.*$//sgo;
1524 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1531 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1535 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1539 my $match_index = 0;
1540 my $block_index = 1;
1541 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1542 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1544 my $block_name_index = 0;
1545 my $block_value_index = 1;
1546 my $block = $fp_mods_xpath[$block_index];
1547 while ( my $part = $$block[$block_value_index] ) {
1549 for my $xpath ( @{ $part->{xpath} } ) {
1550 $text = $mods->findvalue( $xpath );
1554 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1558 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1559 $fp_string .= $text;
1562 $block_name_index += 2;
1563 $block_value_index += 2;
1567 $fp_string =~ s/\W+//gso;
1568 $log->debug("Fingerprint is [$fp_string]", INFO);;
1578 sub refingerprint_bibrec {
1584 if (!OpenILS::Application::Ingest->in_transaction) {
1585 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1591 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1592 for my $b (@$bibs) {
1593 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1595 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1597 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1599 OpenILS::Application::Ingest->storage_req(
1600 'open-ils.storage.direct.biblio.record_entry.remote_update',
1602 { fingerprint => $fp->{fingerprint},
1603 quality => $fp->{quality} }
1606 if ($self->api_name !~ /nomap/o) {
1607 my $old_source_map = OpenILS::Application::Ingest->storage_req(
1608 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1613 if (ref($old_source_map) and @$old_source_map) {
1614 for my $m (@$old_source_map) {
1615 $old_mrid = $m->metarecord;
1616 OpenILS::Application::Ingest->storage_req(
1617 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1623 my $old_sm = OpenILS::Application::Ingest->storage_req(
1624 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1625 { metarecord => $old_mrid }
1628 if (ref($old_sm) and @$old_sm == 0) {
1629 OpenILS::Application::Ingest->storage_req(
1630 'open-ils.storage.direct.metabib.metarecord.delete',
1635 my $mr = OpenILS::Application::Ingest->storage_req(
1636 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1637 { fingerprint => $fp->{fingerprint} }
1641 $mr = Fieldmapper::metabib::metarecord->new;
1642 $mr->fingerprint( $fp->{fingerprint} );
1643 $mr->master_record( $b->id );
1644 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1647 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1648 $mr_map->metarecord( $mr->id );
1649 $mr_map->source( $b->id );
1650 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1654 $client->respond($b->id);
1658 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1662 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1663 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1666 __PACKAGE__->register_method(
1667 api_name => "open-ils.worm.fingerprint.record.update",
1668 method => "refingerprint_bibrec",
1674 __PACKAGE__->register_method(
1675 api_name => "open-ils.worm.fingerprint.record.update.nomap",
1676 method => "refingerprint_bibrec",
1683 sub fingerprint_bibrec {
1688 OpenILS::Application::Ingest->post_init();
1689 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1691 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1692 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1696 __PACKAGE__->register_method(
1697 api_name => "open-ils.worm.fingerprint.record",
1698 method => "fingerprint_bibrec",
1704 sub fingerprint_mods {
1709 OpenILS::Application::Ingest->post_init();
1710 my $mods = $parser->parse_string($xml)->documentElement;
1712 return _fp_mods( $mods );
1714 __PACKAGE__->register_method(
1715 api_name => "open-ils.worm.fingerprint.mods",
1716 method => "fingerprint_mods",
1721 sub fingerprint_marc {
1726 $xml = $parser->parse_string($xml) unless (ref $xml);
1728 OpenILS::Application::Ingest->post_init();
1729 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1730 $log->debug("Returning [$fp] as fingerprint", INFO);
1733 __PACKAGE__->register_method(
1734 api_name => "open-ils.worm.fingerprint.marc",
1735 method => "fingerprint_marc",
1743 sub biblio_fingerprint_record {
1748 OpenILS::Application::Ingest->post_init();
1750 my $marc = OpenILS::Application::Ingest
1751 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1754 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1755 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1758 __PACKAGE__->register_method(
1759 api_name => "open-ils.worm.fingerprint.record",
1760 method => "biblio_fingerprint_record",
1766 sub biblio_fingerprint {
1771 OpenILS::Application::Ingest->post_init();
1773 $marc = $parser->parse_string($marc) unless (ref $marc);
1775 my $mods = OpenILS::Application::Ingest::entityize(
1777 ->transform( $marc )
1783 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
1786 $log->internal("Got MARC [$marc]");
1787 $log->internal("Created MODS [$mods]");
1790 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1791 my $conf = OpenSRF::Utils::SettingsClient->new;
1793 my $libs = $conf->config_value(@pfx, 'script_path');
1794 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1795 my $script_libs = (ref($libs)) ? $libs : [$libs];
1797 $log->debug("Loading script $script_file for biblio fingerprinting...");
1799 $fp_script = new OpenILS::Utils::ScriptRunner
1800 ( file => $script_file,
1801 paths => $script_libs,
1802 reset_count => 1000 );
1805 $log->debug("Applying environment for biblio fingerprinting...");
1807 my $env = {marc => $marc, mods => $mods};
1808 #my $res = {fingerprint => '', quality => '0'};
1810 $fp_script->insert('environment' => $env);
1811 #$fp_script->insert('result' => $res);
1813 $log->debug("Running script for biblio fingerprinting...");
1815 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
1817 $log->debug("Script for biblio fingerprinting completed successfully...");
1821 __PACKAGE__->register_method(
1822 api_name => "open-ils.worm.fingerprint.marc",
1823 method => "biblio_fingerprint",
1828 # --------------------------------------------------------------------------------
1842 my $create_source_map;
1857 my %descriptor_code = (
1858 item_type => 'substr($ldr,6,1)',
1859 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1860 bib_level => 'substr($ldr,7,1)',
1861 control_type => 'substr($ldr,8,1)',
1862 char_encoding => 'substr($ldr,9,1)',
1863 enc_level => 'substr($ldr,17,1)',
1864 cat_form => 'substr($ldr,18,1)',
1865 pub_status => 'substr($ldr,5,1)',
1866 item_lang => 'substr($oo8,35,3)',
1867 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1868 audience => 'substr($oo8,22,1)',
1878 if ($self->api_name =~ /no_map/o) {
1882 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1884 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1886 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1888 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1890 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
1891 unless ($sm_lookup);
1892 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
1893 unless ($mr_lookup);
1894 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
1895 unless ($mr_update);
1896 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
1898 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
1899 unless ($update_entry);
1900 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
1901 unless ($rm_old_sm);
1902 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
1903 unless ($rm_old_rd);
1904 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
1905 unless ($rm_old_fr);
1906 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
1907 unless ($rm_old_tr);
1908 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
1909 unless ($rm_old_ar);
1910 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
1911 unless ($rm_old_sr);
1912 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
1913 unless ($rm_old_kr);
1914 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
1915 unless ($rm_old_ser);
1916 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
1917 unless ($mr_create);
1918 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
1919 unless ($create_source_map);
1920 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
1921 unless ($rd_create);
1922 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
1923 unless ($fr_create);
1924 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
1925 unless ($$create{title});
1926 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
1927 unless ($$create{author});
1928 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
1929 unless ($$create{subject});
1930 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
1931 unless ($$create{keyword});
1932 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
1933 unless ($$create{series});
1936 my ($outer_xact) = $in_xact->run;
1938 unless ($outer_xact) {
1939 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1940 my ($r) = $begin->run($client);
1941 unless (defined $r and $r) {
1943 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1946 } catch Error with {
1947 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
1957 for my $entry ( $lookup->run(@docids) ) {
1958 # step -1: grab the doc from storage
1959 next unless ($entry);
1962 my $xslt_doc = $parser->parse_file(
1963 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
1964 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
1967 my $xml = $entry->marc;
1968 my $docid = $entry->id;
1969 my $marcdoc = $parser->parse_string($xml);
1970 my $modsdoc = $mods_sheet->transform($marcdoc);
1972 my $mods = $modsdoc->documentElement;
1973 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1975 $entry->fingerprint( fingerprint_mods( $mods ) );
1976 push @entry_list, $entry;
1978 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
1981 my ($mr) = $mr_lookup->run( $entry->fingerprint );
1982 if (!$mr || !@$mr) {
1983 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
1984 $mr = new Fieldmapper::metabib::metarecord;
1985 $mr->fingerprint( $entry->fingerprint );
1986 $mr->master_record( $entry->id );
1987 my ($new_mr) = $mr_create->run($mr);
1989 unless (defined $mr) {
1990 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
1993 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
1998 my $sm = new Fieldmapper::metabib::metarecord_source_map;
1999 $sm->metarecord( $mr->id );
2000 $sm->source( $entry->id );
2001 push @source_maps, $sm;
2004 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2005 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2007 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2008 for my $rd_field ( keys %descriptor_code ) {
2009 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2011 $rd_obj->record( $docid );
2012 push @rd_list, $rd_obj;
2014 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2016 # step 2: build the KOHA rows
2017 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2018 $_->record( $docid ) for (@tmp_list);
2019 push @ns_list, @tmp_list;
2023 last unless ($self->api_name =~ /batch$/o);
2026 $rm_old_rd->run( { record => \@docids } );
2027 $rm_old_fr->run( { record => \@docids } );
2028 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2029 $rm_old_tr->run( { source => \@docids } );
2030 $rm_old_ar->run( { source => \@docids } );
2031 $rm_old_sr->run( { source => \@docids } );
2032 $rm_old_kr->run( { source => \@docids } );
2033 $rm_old_ser->run( { source => \@docids } );
2036 my ($sm) = $create_source_map->run(@source_maps);
2037 unless (defined $sm) {
2038 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2040 my ($mr) = $mr_update->run(@mr_list);
2041 unless (defined $mr) {
2042 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2046 my ($re) = $update_entry->run(@entry_list);
2047 unless (defined $re) {
2048 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2051 my ($rd) = $rd_create->run(@rd_list);
2052 unless (defined $rd) {
2053 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2056 my ($fr) = $fr_create->run(@ns_list);
2057 unless (defined $fr) {
2058 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2061 # step 5: insert the new metadata
2062 for my $class ( qw/title author subject keyword series/ ) {
2064 for my $doc ( @mods_data ) {
2065 my ($did) = keys %$doc;
2066 my ($data) = values %$doc;
2068 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2069 for my $row ( keys %{ $$data{$class} } ) {
2070 next unless (exists $$data{$class}{$row});
2071 next unless ($$data{$class}{$row}{value});
2072 my $fm_obj = $fm_constructor->new;
2073 $fm_obj->value( $$data{$class}{$row}{value} );
2074 $fm_obj->field( $$data{$class}{$row}{field_id} );
2075 $fm_obj->source( $did );
2076 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2078 push @md_list, $fm_obj;
2082 my ($cr) = $$create{$class}->run(@md_list);
2083 unless (defined $cr) {
2084 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2088 unless ($outer_xact) {
2089 $log->debug("Commiting transaction started by the Ingest.", INFO);
2090 my ($c) = $commit->run;
2091 unless (defined $c and $c) {
2093 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2099 __PACKAGE__->register_method(
2100 api_name => "open-ils.worm.wormize",
2101 method => "wormize",
2105 __PACKAGE__->register_method(
2106 api_name => "open-ils.worm.wormize.no_map",
2107 method => "wormize",
2111 __PACKAGE__->register_method(
2112 api_name => "open-ils.worm.wormize.batch",
2113 method => "wormize",
2117 __PACKAGE__->register_method(
2118 api_name => "open-ils.worm.wormize.no_map.batch",
2119 method => "wormize",
2134 my $acreate_source_map;
2149 sub authority_wormize {
2156 if ($self->api_name =~ /no_map/o) {
2160 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2162 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2164 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2166 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2168 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2170 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2171 unless ($aupdate_entry);
2172 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2173 unless ($arm_old_rd);
2174 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2175 unless ($arm_old_fr);
2176 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2177 unless ($ard_create);
2178 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2179 unless ($afr_create);
2182 my ($outer_xact) = $in_xact->run;
2184 unless ($outer_xact) {
2185 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2186 my ($r) = $begin->run($client);
2187 unless (defined $r and $r) {
2189 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2192 } catch Error with {
2193 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2203 for my $entry ( $lookup->run(@docids) ) {
2204 # step -1: grab the doc from storage
2205 next unless ($entry);
2208 # my $xslt_doc = $parser->parse_file(
2209 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2210 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2213 my $xml = $entry->marc;
2214 my $docid = $entry->id;
2215 my $marcdoc = $parser->parse_string($xml);
2216 #my $madsdoc = $mads_sheet->transform($marcdoc);
2218 #my $mads = $madsdoc->documentElement;
2219 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2221 push @entry_list, $entry;
2223 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2224 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2226 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2227 for my $rd_field ( keys %descriptor_code ) {
2228 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2230 $rd_obj->record( $docid );
2231 push @rd_list, $rd_obj;
2233 # step 2: build the KOHA rows
2234 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2235 $_->record( $docid ) for (@tmp_list);
2236 push @ns_list, @tmp_list;
2240 last unless ($self->api_name =~ /batch$/o);
2243 $arm_old_rd->run( { record => \@docids } );
2244 $arm_old_fr->run( { record => \@docids } );
2246 my ($rd) = $ard_create->run(@rd_list);
2247 unless (defined $rd) {
2248 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2251 my ($fr) = $fr_create->run(@ns_list);
2252 unless (defined $fr) {
2253 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2256 unless ($outer_xact) {
2257 $log->debug("Commiting transaction started by Ingest.", INFO);
2258 my ($c) = $commit->run;
2259 unless (defined $c and $c) {
2261 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2267 __PACKAGE__->register_method(
2268 api_name => "open-ils.worm.authortiy.wormize",
2269 method => "wormize",
2273 __PACKAGE__->register_method(
2274 api_name => "open-ils.worm.authority.wormize.batch",
2275 method => "wormize",
2281 # --------------------------------------------------------------------------------
2284 sub _marcxml_to_full_rows {
2286 my $marcxml = shift;
2287 my $type = shift || 'Fieldmapper::metabib::full_rec';
2291 my $root = $marcxml->documentElement;
2293 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2294 next unless $tagline;
2296 my $ns = new Fieldmapper::metabib::full_rec;
2299 my $val = NFD($tagline->textContent);
2300 $val =~ s/(\pM+)//gso;
2306 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2307 next unless $tagline;
2309 my $ns = new Fieldmapper::metabib::full_rec;
2311 $ns->tag( $tagline->getAttribute( "tag" ) );
2312 my $val = NFD($tagline->textContent);
2313 $val =~ s/(\pM+)//gso;
2319 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2320 next unless $tagline;
2322 my $tag = $tagline->getAttribute( "tag" );
2323 my $ind1 = $tagline->getAttribute( "ind1" );
2324 my $ind2 = $tagline->getAttribute( "ind2" );
2326 for my $data ( $tagline->childNodes ) {
2329 my $ns = $type->new;
2334 $ns->subfield( $data->getAttribute( "code" ) );
2335 my $val = NFD($data->textContent);
2336 $val =~ s/(\pM+)//gso;
2337 $ns->value( lc($val) );
2345 sub _get_field_value {
2347 my( $root, $xpath ) = @_;
2351 # grab the set of matching nodes
2352 my @nodes = $root->findnodes( $xpath );
2353 for my $value (@nodes) {
2355 # grab all children of the node
2356 my @children = $value->childNodes();
2357 for my $child (@children) {
2359 # add the childs content to the growing buffer
2360 my $content = quotemeta($child->textContent);
2361 next if ($string =~ /$content/); # uniquify the values
2362 $string .= $child->textContent . " ";
2365 $string .= $value->textContent . " ";
2368 $string = NFD($string);
2369 $string =~ s/(\pM)//gso;
2374 sub modsdoc_to_values {
2375 my( $self, $mods ) = @_;
2377 for my $class (keys %$xpathset) {
2378 $data->{$class} = {};
2379 for my $type (keys %{$xpathset->{$class}}) {
2380 $data->{$class}->{$type} = {};
2381 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};