1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
30 our $log = 'OpenSRF::Utils::Logger';
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
43 unless (keys %$xpathset) {
44 $log->debug("Running post_init", DEBUG);
46 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
48 unless ($supported_formats{mods}{xslt}) {
49 $log->debug("Loading MODS XSLT", DEBUG);
50 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
54 unless ($supported_formats{mods3}{xslt}) {
55 $log->debug("Loading MODS v3 XSLT", DEBUG);
56 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61 my $req = OpenSRF::AppSession
62 ->create('open-ils.cstore')
63 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
66 if (ref $req and @$req) {
68 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
87 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
91 sub ro_biblio_ingest_single_xml {
96 my $document = $parser->parse_string($xml);
98 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
99 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
100 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
102 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp };
104 __PACKAGE__->register_method(
105 api_name => "open-ils.ingest.full.biblio.xml.readonly",
106 method => "ro_biblio_ingest_single_xml",
111 sub ro_biblio_ingest_single_record {
116 OpenILS::Application::Ingest->post_init();
117 my $r = OpenSRF::AppSession
118 ->create('open-ils.cstore')
119 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
122 return undef unless ($r and @$r);
124 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
126 $_->source($rec) for (@{$res->{field_entries}});
127 $_->record($rec) for (@{$res->{full_rec}});
131 __PACKAGE__->register_method(
132 api_name => "open-ils.ingest.full.biblio.record.readonly",
133 method => "ro_biblio_ingest_single_record",
138 sub ro_biblio_ingest_stream_record {
142 OpenILS::Application::Ingest->post_init();
144 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
146 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
148 my $rec = $resp->content;
149 last unless (defined $rec);
151 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
152 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
154 $_->source($rec) for (@{$res->{field_entries}});
155 $_->record($rec) for (@{$res->{full_rec}});
157 $client->respond( $res );
162 __PACKAGE__->register_method(
163 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
164 method => "ro_biblio_ingest_stream_record",
169 sub ro_biblio_ingest_stream_xml {
173 OpenILS::Application::Ingest->post_init();
175 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
177 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
179 my $xml = $resp->content;
180 last unless (defined $xml);
182 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
183 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
185 $client->respond( $res );
190 __PACKAGE__->register_method(
191 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
192 method => "ro_biblio_ingest_stream_xml",
197 sub rw_biblio_ingest_stream_import {
201 OpenILS::Application::Ingest->post_init();
203 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
205 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
207 my $bib = $resp->content;
208 last unless (defined $bib);
210 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
211 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
213 $_->source($bib->id) for (@{$res->{field_entries}});
214 $_->record($bib->id) for (@{$res->{full_rec}});
216 $client->respond( @{$res->{field_entries}} + @{$res->{full_rec}} );
221 __PACKAGE__->register_method(
222 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
223 method => "rw_biblio_ingest_stream_import",
229 # --------------------------------------------------------------------------------
230 # MARC index extraction
232 package OpenILS::Application::Ingest::XPATH;
233 use base qw/OpenILS::Application::Ingest/;
234 use Unicode::Normalize;
236 # give this an XML documentElement and an XPATH expression
237 sub xpath_to_string {
241 my $ns_prefix = shift;
244 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
248 # grab the set of matching nodes
249 my @nodes = $xml->findnodes( $xpath );
250 for my $value (@nodes) {
252 # grab all children of the node
253 my @children = $value->childNodes();
254 for my $child (@children) {
256 # add the childs content to the growing buffer
257 my $content = quotemeta($child->textContent);
258 next if ($unique && $string =~ /$content/); # uniquify the values
259 $string .= $child->textContent . " ";
262 $string .= $value->textContent . " ";
268 sub class_index_string_xml {
274 OpenILS::Application::Ingest->post_init();
275 $xml = $parser->parse_string($xml) unless (ref $xml);
279 for my $class (@classes) {
280 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
281 for my $type ( keys %{ $xpathset->{$class} } ) {
283 my $def = $xpathset->{$class}->{$type};
284 my $sf = $supported_formats{$def->{format}};
289 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
290 $transform_cache{$def->{format}} = $document;
293 my $value = xpath_to_string(
294 $document->documentElement => $def->{xpath},
295 $sf->{ns} => $def->{format},
301 $value =~ s/\pM+//sgo;
302 $value =~ s/\pC+//sgo;
303 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
305 $value =~ s/(\w)\./$1/sgo;
308 my $fm = $class_constructor->new;
309 $fm->value( $value );
310 $fm->field( $xpathset->{$class}->{$type}->{id} );
311 $client->respond($fm);
316 __PACKAGE__->register_method(
317 api_name => "open-ils.ingest.field_entry.class.xml",
318 method => "class_index_string_xml",
324 sub class_index_string_record {
330 OpenILS::Application::Ingest->post_init();
331 my $r = OpenSRF::AppSession
332 ->create('open-ils.cstore')
333 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
336 return undef unless ($r and @$r);
338 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
340 $client->respond($fm);
344 __PACKAGE__->register_method(
345 api_name => "open-ils.ingest.field_entry.class.record",
346 method => "class_index_string_record",
352 sub all_index_string_xml {
357 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
358 $client->respond($fm);
362 __PACKAGE__->register_method(
363 api_name => "open-ils.ingest.extract.field_entry.all.xml",
364 method => "all_index_string_xml",
370 sub all_index_string_record {
375 OpenILS::Application::Ingest->post_init();
376 my $r = OpenSRF::AppSession
377 ->create('open-ils.cstore')
378 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
381 return undef unless ($r and @$r);
383 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
385 $client->respond($fm);
389 __PACKAGE__->register_method(
390 api_name => "open-ils.ingest.extract.field_entry.all.record",
391 method => "all_index_string_record",
397 # --------------------------------------------------------------------------------
400 package OpenILS::Application::Ingest::FlatMARC;
401 use base qw/OpenILS::Application::Ingest/;
402 use Unicode::Normalize;
405 sub _marcxml_to_full_rows {
408 my $xmltype = shift || 'metabib';
410 my $type = "Fieldmapper::${xmltype}::full_rec";
414 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
416 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
417 next unless $tagline;
422 my $val = $tagline->textContent;
424 $val =~ s/(\pM+)//gso;
430 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
431 next unless $tagline;
435 $ns->tag( $tagline->getAttribute( "tag" ) );
436 my $val = $tagline->textContent;
438 $val =~ s/(\pM+)//gso;
444 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
445 next unless $tagline;
447 my $tag = $tagline->getAttribute( "tag" );
448 my $ind1 = $tagline->getAttribute( "ind1" );
449 my $ind2 = $tagline->getAttribute( "ind2" );
451 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
459 $ns->subfield( $data->getAttribute( "code" ) );
460 my $val = $data->textContent;
462 $val =~ s/(\pM+)//gso;
463 $ns->value( lc($val) );
469 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
478 $log->debug("processing [$xml]");
480 $xml = $parser->parse_string($xml) unless (ref $xml);
482 my $type = 'metabib';
483 $type = 'authority' if ($self->api_name =~ /authority/o);
485 OpenILS::Application::Ingest->post_init();
487 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
490 __PACKAGE__->register_method(
491 api_name => "open-ils.ingest.flat_marc.authority.xml",
492 method => "flat_marc_xml",
497 __PACKAGE__->register_method(
498 api_name => "open-ils.ingest.flat_marc.biblio.xml",
499 method => "flat_marc_xml",
505 sub flat_marc_record {
511 $type = 'authority' if ($self->api_name =~ /authority/o);
513 OpenILS::Application::Ingest->post_init();
514 my $r = OpenSRF::AppSession
515 ->create('open-ils.cstore')
516 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
520 return undef unless ($r and $r->marc);
522 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
523 for my $row (@rows) {
524 $client->respond($row);
525 $log->debug(JSON->perl2JSON($row), DEBUG);
529 __PACKAGE__->register_method(
530 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
531 method => "flat_marc_record",
536 __PACKAGE__->register_method(
537 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
538 method => "flat_marc_record",
544 # --------------------------------------------------------------------------------
547 package OpenILS::Application::Ingest::Biblio::Fingerprint;
548 use base qw/OpenILS::Application::Ingest/;
549 use Unicode::Normalize;
550 use OpenSRF::EX qw/:try/;
552 sub biblio_fingerprint_record {
557 OpenILS::Application::Ingest->post_init();
559 my $r = OpenSRF::AppSession
560 ->create('open-ils.cstore')
561 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
564 return undef unless ($r and $r->marc);
566 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
567 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
570 __PACKAGE__->register_method(
571 api_name => "open-ils.ingest.fingerprint.record",
572 method => "biblio_fingerprint_record",
578 sub biblio_fingerprint {
583 $log->internal("Got MARC [$xml]");
586 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
587 my $conf = OpenSRF::Utils::SettingsClient->new;
589 my $libs = $conf->config_value(@pfx, 'script_path');
590 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
591 my $script_libs = (ref($libs)) ? $libs : [$libs];
593 $log->debug("Loading script $script_file for biblio fingerprinting...");
595 $fp_script = new OpenILS::Utils::ScriptRunner
596 ( file => $script_file,
597 paths => $script_libs,
598 reset_count => 1000 );
601 $fp_script->insert('environment' => {marc => $xml} => 1);
603 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
604 $log->debug("Script for biblio fingerprinting completed successfully...");
608 __PACKAGE__->register_method(
609 api_name => "open-ils.ingest.fingerprint.xml",
610 method => "biblio_fingerprint",
621 OpenILS::Application::Ingest->post_init();
622 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
625 sub begin_transaction {
629 OpenILS::Application::Ingest->post_init();
630 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
634 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
635 #__PACKAGE__->st_sess->connect;
636 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
637 unless (defined $r and $r) {
638 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
639 #__PACKAGE__->st_sess->disconnect;
640 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
644 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
647 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
650 sub rollback_transaction {
654 OpenILS::Application::Ingest->post_init();
655 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
659 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
661 $log->debug("Ingest isn't inside a transaction.", INFO);
664 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
670 sub commit_transaction {
674 OpenILS::Application::Ingest->post_init();
675 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
678 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
680 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
681 unless (defined $r and $r) {
682 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
683 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
685 #__PACKAGE__->st_sess->disconnect;
687 $log->debug("Ingest isn't inside a transaction.", INFO);
690 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
699 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
700 return shift( @res );
703 sub scrub_authority_record {
709 if (!OpenILS::Application::Ingest->in_transaction) {
710 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
716 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
718 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
719 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
721 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
723 $log->debug('Scrubbing failed : '.shift(), ERROR);
724 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
728 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
729 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
732 __PACKAGE__->register_method(
733 api_name => "open-ils.worm.scrub.authority",
734 method => "scrub_authority_record",
740 sub scrub_metabib_record {
745 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
746 $rec = OpenILS::Application::Ingest->storage_req(
747 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
752 if (!OpenILS::Application::Ingest->in_transaction) {
753 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
759 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
761 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
762 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
763 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
764 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
765 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
766 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
767 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
768 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
770 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
771 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
773 for my $mr (@$masters) {
774 $log->debug( "Found metarecord whose master is $rec", DEBUG);
775 my $others = OpenILS::Application::Ingest->storage_req(
776 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
779 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
780 $mr->master_record($others->[0]->source);
781 OpenILS::Application::Ingest->storage_req(
782 'open-ils.storage.direct.metabib.metarecord.remote_update',
784 { master_record => $others->[0]->source, mods => undef }
787 warn "Removing metarecord whose master is $rec";
788 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
789 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
790 warn "Metarecord removed";
791 $log->debug( "Metarecord removed", DEBUG);
795 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
798 $log->debug('Scrubbing failed : '.shift(), ERROR);
799 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
803 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
804 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
807 __PACKAGE__->register_method(
808 api_name => "open-ils.worm.scrub.biblio",
809 method => "scrub_metabib_record",
814 sub wormize_biblio_metarecord {
819 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
825 $success = wormize_biblio_record($self => $client => $r->source);
827 { record => $r->source,
828 metarecord => $rec->metarecord,
835 { record => $r->source,
836 metarecord => $rec->metarecord,
845 __PACKAGE__->register_method(
846 api_name => "open-ils.worm.wormize.metarecord",
847 method => "wormize_biblio_metarecord",
852 __PACKAGE__->register_method(
853 api_name => "open-ils.worm.wormize.metarecord.nomap",
854 method => "wormize_biblio_metarecord",
859 __PACKAGE__->register_method(
860 api_name => "open-ils.worm.wormize.metarecord.noscrub",
861 method => "wormize_biblio_metarecord",
866 __PACKAGE__->register_method(
867 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
868 method => "wormize_biblio_metarecord",
875 sub wormize_biblio_record {
880 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
881 $rec = OpenILS::Application::Ingest->storage_req(
882 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
888 if (!OpenILS::Application::Ingest->in_transaction) {
889 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
896 unless ($self->api_name =~ /noscrub/o) {
897 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
901 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
904 my @rec_descriptor = ();
916 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
918 my $xml = $parser->parse_string($r->marc);
920 #update the fingerprint
921 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
922 OpenILS::Application::Ingest->storage_req(
923 'open-ils.storage.direct.biblio.record_entry.remote_update',
925 { fingerprint => $fp->{fingerprint},
926 quality => int($fp->{quality}) }
927 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
930 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
931 $fr->record( $r->id );
935 # the rec_descriptor stuff
936 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
937 $rd->record( $r->id );
938 push @rec_descriptor, $rd;
940 # the indexing field entry stuff
941 for my $class ( qw/title author subject keyword series/ ) {
942 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
943 $fe->source( $r->id );
944 push @{$field_entry{$class}}, $fe;
948 unless ($self->api_name =~ /nomap/o) {
949 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
952 $mr = Fieldmapper::metabib::metarecord->new;
953 $mr->fingerprint( $fp->{fingerprint} );
954 $mr->master_record( $r->id );
955 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
958 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
959 $mr_map->metarecord( $mr->id );
960 $mr_map->source( $r->id );
961 push @source_map, $mr_map;
963 $metarecord{$mr->id} = $mr;
965 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
967 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
968 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
973 if (@rec_descriptor) {
974 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
976 OpenILS::Application::Ingest->storage_req(
977 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
981 for my $mr ( values %metarecord ) {
982 my $sources = OpenILS::Application::Ingest->storage_req(
983 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
987 my $bibs = OpenILS::Application::Ingest->storage_req(
988 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
989 [ map { $_->source } @$sources ]
992 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
994 OpenILS::Application::Ingest->storage_req(
995 'open-ils.storage.direct.metabib.metarecord.remote_update',
997 { master_record => $master->id, mods => undef }
1001 OpenILS::Application::Ingest->storage_req(
1002 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1004 ) if (@rec_descriptor);
1006 OpenILS::Application::Ingest->storage_req(
1007 'open-ils.storage.direct.metabib.full_rec.batch.create',
1011 OpenILS::Application::Ingest->storage_req(
1012 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1013 @{ $field_entry{title} }
1014 ) if (@{ $field_entry{title} });
1016 OpenILS::Application::Ingest->storage_req(
1017 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1018 @{ $field_entry{author} }
1019 ) if (@{ $field_entry{author} });
1021 OpenILS::Application::Ingest->storage_req(
1022 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1023 @{ $field_entry{subject} }
1024 ) if (@{ $field_entry{subject} });
1026 OpenILS::Application::Ingest->storage_req(
1027 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1028 @{ $field_entry{keyword} }
1029 ) if (@{ $field_entry{keyword} });
1031 OpenILS::Application::Ingest->storage_req(
1032 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1033 @{ $field_entry{series} }
1034 ) if (@{ $field_entry{series} });
1036 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1042 $log->debug('Wormization failed : '.shift(), ERROR);
1043 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1047 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1048 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1051 __PACKAGE__->register_method(
1052 api_name => "open-ils.worm.wormize.biblio",
1053 method => "wormize_biblio_record",
1057 __PACKAGE__->register_method(
1058 api_name => "open-ils.worm.wormize.biblio.nomap",
1059 method => "wormize_biblio_record",
1063 __PACKAGE__->register_method(
1064 api_name => "open-ils.worm.wormize.biblio.noscrub",
1065 method => "wormize_biblio_record",
1069 __PACKAGE__->register_method(
1070 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1071 method => "wormize_biblio_record",
1076 sub wormize_authority_record {
1082 if (!OpenILS::Application::Ingest->in_transaction) {
1083 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1089 # clean up the cruft
1090 unless ($self->api_name =~ /noscrub/o) {
1091 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1095 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1098 my @rec_descriptor = ();
1099 for my $r (@$bibs) {
1100 my $xml = $parser->parse_string($r->marc);
1102 # the full_rec stuff
1103 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1104 $fr->record( $r->id );
1105 push @full_rec, $fr;
1108 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1109 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1110 #$rd->record( $r->id );
1111 #push @rec_descriptor, $rd;
1115 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1117 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1118 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1120 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1123 $log->debug('Wormization failed : '.shift(), ERROR);
1124 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1128 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1129 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1132 __PACKAGE__->register_method(
1133 api_name => "open-ils.worm.wormize.authority",
1134 method => "wormize_authority_record",
1138 __PACKAGE__->register_method(
1139 api_name => "open-ils.worm.wormize.authority.noscrub",
1140 method => "wormize_authority_record",
1146 # --------------------------------------------------------------------------------
1147 # MARC index extraction
1149 package OpenILS::Application::Ingest::XPATH;
1150 use base qw/OpenILS::Application::Ingest/;
1151 use Unicode::Normalize;
1153 # give this a MODS documentElement and an XPATH expression
1154 sub _xpath_to_string {
1158 my $ns_prefix = shift;
1161 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1165 # grab the set of matching nodes
1166 my @nodes = $xml->findnodes( $xpath );
1167 for my $value (@nodes) {
1169 # grab all children of the node
1170 my @children = $value->childNodes();
1171 for my $child (@children) {
1173 # add the childs content to the growing buffer
1174 my $content = quotemeta($child->textContent);
1175 next if ($unique && $string =~ /$content/); # uniquify the values
1176 $string .= $child->textContent . " ";
1179 $string .= $value->textContent . " ";
1182 return NFD($string);
1185 sub class_all_index_string_xml {
1191 OpenILS::Application::Ingest->post_init();
1192 $xml = $parser->parse_string($xml) unless (ref $xml);
1194 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1195 for my $type ( keys %{ $xpathset->{$class} } ) {
1196 my $value = _xpath_to_string(
1197 $mods_sheet->transform($xml)->documentElement,
1198 $xpathset->{$class}->{$type}->{xpath},
1199 "http://www.loc.gov/mods/",
1206 $value =~ s/\pM+//sgo;
1207 $value =~ s/\pC+//sgo;
1208 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1210 $value =~ s/(\w)\./$1/sgo;
1211 $value = lc($value);
1213 my $fm = $class_constructor->new;
1214 $fm->value( $value );
1215 $fm->field( $xpathset->{$class}->{$type}->{id} );
1216 $client->respond($fm);
1220 __PACKAGE__->register_method(
1221 api_name => "open-ils.worm.field_entry.class.xml",
1222 method => "class_all_index_string_xml",
1228 sub class_all_index_string_record {
1234 OpenILS::Application::Ingest->post_init();
1235 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1237 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1239 $client->respond($fm);
1243 __PACKAGE__->register_method(
1244 api_name => "open-ils.worm.field_entry.class.record",
1245 method => "class_all_index_string_record",
1252 sub class_index_string_xml {
1259 OpenILS::Application::Ingest->post_init();
1260 $xml = $parser->parse_string($xml) unless (ref $xml);
1261 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1263 __PACKAGE__->register_method(
1264 api_name => "open-ils.worm.class.type.xml",
1265 method => "class_index_string_xml",
1270 sub class_index_string_record {
1277 OpenILS::Application::Ingest->post_init();
1278 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1280 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1281 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1284 __PACKAGE__->register_method(
1285 api_name => "open-ils.worm.class.type.record",
1286 method => "class_index_string_record",
1300 OpenILS::Application::Ingest->post_init();
1301 $xml = $parser->parse_string($xml) unless (ref $xml);
1302 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1304 __PACKAGE__->register_method(
1305 api_name => "open-ils.worm.xpath.xml",
1306 method => "xml_xpath",
1320 OpenILS::Application::Ingest->post_init();
1321 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1323 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1324 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1327 __PACKAGE__->register_method(
1328 api_name => "open-ils.worm.xpath.record",
1329 method => "record_xpath",
1335 # --------------------------------------------------------------------------------
1338 package OpenILS::Application::Ingest::Biblio::Leader;
1339 use base qw/OpenILS::Application::Ingest/;
1340 use Unicode::Normalize;
1342 our %marc_type_groups = (
1345 VIS => q/[gkro]{1}/,
1354 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1358 our %biblio_descriptor_code = (
1359 item_type => sub { substr($ldr,6,1); },
1362 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1363 return substr($oo8,29,1);
1364 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1365 return substr($oo8,23,1);
1369 bib_level => sub { substr($ldr,7,1); },
1370 control_type => sub { substr($ldr,8,1); },
1371 char_encoding => sub { substr($ldr,9,1); },
1372 enc_level => sub { substr($ldr,17,1); },
1373 cat_form => sub { substr($ldr,18,1); },
1374 pub_status => sub { substr($ldr,5,1); },
1375 item_lang => sub { substr($oo8,35,3); },
1376 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1377 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1378 audience => sub { substr($oo8,22,1); },
1381 sub _extract_biblio_descriptors {
1384 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1385 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1386 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1388 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1389 for my $rd_field ( keys %biblio_descriptor_code ) {
1390 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1396 sub extract_biblio_desc_xml {
1401 $xml = $parser->parse_string($xml) unless (ref $xml);
1403 return _extract_biblio_descriptors( $xml );
1405 __PACKAGE__->register_method(
1406 api_name => "open-ils.worm.biblio_leader.xml",
1407 method => "extract_biblio_desc_xml",
1412 sub extract_biblio_desc_record {
1417 OpenILS::Application::Ingest->post_init();
1418 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1420 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1421 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1424 __PACKAGE__->register_method(
1425 api_name => "open-ils.worm.biblio_leader.record",
1426 method => "extract_biblio_desc_record",
1431 # --------------------------------------------------------------------------------
1434 package OpenILS::Application::Ingest::FlatMARC;
1435 use base qw/OpenILS::Application::Ingest/;
1436 use Unicode::Normalize;
1439 sub _marcxml_to_full_rows {
1441 my $marcxml = shift;
1442 my $xmltype = shift || 'metabib';
1444 my $type = "Fieldmapper::${xmltype}::full_rec";
1448 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1450 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1451 next unless $tagline;
1453 my $ns = $type->new;
1456 my $val = $tagline->textContent;
1458 $val =~ s/(\pM+)//gso;
1464 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1465 next unless $tagline;
1467 my $ns = $type->new;
1469 $ns->tag( $tagline->getAttribute( "tag" ) );
1470 my $val = $tagline->textContent;
1472 $val =~ s/(\pM+)//gso;
1478 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1479 next unless $tagline;
1481 my $tag = $tagline->getAttribute( "tag" );
1482 my $ind1 = $tagline->getAttribute( "ind1" );
1483 my $ind2 = $tagline->getAttribute( "ind2" );
1485 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1488 my $ns = $type->new;
1493 $ns->subfield( $data->getAttribute( "code" ) );
1494 my $val = $data->textContent;
1496 $val =~ s/(\pM+)//gso;
1497 $ns->value( lc($val) );
1503 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1512 $xml = $parser->parse_string($xml) unless (ref $xml);
1514 my $type = 'metabib';
1515 $type = 'authority' if ($self->api_name =~ /authority/o);
1517 OpenILS::Application::Ingest->post_init();
1519 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1522 __PACKAGE__->register_method(
1523 api_name => "open-ils.worm.flat_marc.authority.xml",
1524 method => "flat_marc_xml",
1529 __PACKAGE__->register_method(
1530 api_name => "open-ils.worm.flat_marc.biblio.xml",
1531 method => "flat_marc_xml",
1537 sub flat_marc_record {
1542 my $type = 'biblio';
1543 $type = 'authority' if ($self->api_name =~ /authority/o);
1545 OpenILS::Application::Ingest->post_init();
1546 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1548 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1551 __PACKAGE__->register_method(
1552 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1553 method => "flat_marc_record",
1558 __PACKAGE__->register_method(
1559 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1560 method => "flat_marc_record",
1567 # --------------------------------------------------------------------------------
1570 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1571 use base qw/OpenILS::Application::Ingest/;
1572 use Unicode::Normalize;
1573 use OpenSRF::EX qw/:try/;
1575 my @fp_mods_xpath = (
1576 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1579 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1580 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1581 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1582 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1585 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1587 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1588 $text =~ s/\pM+//gso;
1589 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1591 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1592 $text =~ s/\s+/ /sgo;
1593 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1594 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1595 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1596 $text =~ s/\b(?:the|an?)\b//sgo;
1597 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1598 $text =~ s/\[.[^\]]+\]//sgo;
1599 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1600 $text =~ s/\s*[;\/\.]*$//sgo;
1601 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1606 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1607 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1610 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1612 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1613 $text =~ s/\pM+//gso;
1614 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1616 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1617 $text =~ s/\s+/ /sgo;
1618 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1619 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1620 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1621 $text =~ s/,?\s+.*$//sgo;
1622 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1627 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1630 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1631 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1632 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1633 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1634 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1635 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1636 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1637 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1640 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1642 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1643 $text =~ s/\pM+//gso;
1644 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1646 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1647 $text =~ s/\s+/ /sgo;
1648 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1649 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1650 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1651 $text =~ s/\b(?:the|an?)\b//sgo;
1652 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1653 $text =~ s/\[.[^\]]+\]//sgo;
1654 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1655 $text =~ s/\s*[;\/\.]*$//sgo;
1656 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1661 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1662 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1663 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1664 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1667 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1669 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1670 $text =~ s/\pM+//gso;
1671 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1673 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1674 $text =~ s/\s+/ /sgo;
1675 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1676 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1677 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1678 $text =~ s/,?\s+.*$//sgo;
1679 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1686 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1690 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1694 my $match_index = 0;
1695 my $block_index = 1;
1696 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1697 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1699 my $block_name_index = 0;
1700 my $block_value_index = 1;
1701 my $block = $fp_mods_xpath[$block_index];
1702 while ( my $part = $$block[$block_value_index] ) {
1704 for my $xpath ( @{ $part->{xpath} } ) {
1705 $text = $mods->findvalue( $xpath );
1709 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1713 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1714 $fp_string .= $text;
1717 $block_name_index += 2;
1718 $block_value_index += 2;
1722 $fp_string =~ s/\W+//gso;
1723 $log->debug("Fingerprint is [$fp_string]", INFO);;
1733 sub refingerprint_bibrec {
1739 if (!OpenILS::Application::Ingest->in_transaction) {
1740 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1746 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1747 for my $b (@$bibs) {
1748 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1750 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1752 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1754 OpenILS::Application::Ingest->storage_req(
1755 'open-ils.storage.direct.biblio.record_entry.remote_update',
1757 { fingerprint => $fp->{fingerprint},
1758 quality => $fp->{quality} }
1761 if ($self->api_name !~ /nomap/o) {
1762 my $old_source_map = OpenILS::Application::Ingest->storage_req(
1763 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1768 if (ref($old_source_map) and @$old_source_map) {
1769 for my $m (@$old_source_map) {
1770 $old_mrid = $m->metarecord;
1771 OpenILS::Application::Ingest->storage_req(
1772 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1778 my $old_sm = OpenILS::Application::Ingest->storage_req(
1779 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1780 { metarecord => $old_mrid }
1783 if (ref($old_sm) and @$old_sm == 0) {
1784 OpenILS::Application::Ingest->storage_req(
1785 'open-ils.storage.direct.metabib.metarecord.delete',
1790 my $mr = OpenILS::Application::Ingest->storage_req(
1791 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1792 { fingerprint => $fp->{fingerprint} }
1796 $mr = Fieldmapper::metabib::metarecord->new;
1797 $mr->fingerprint( $fp->{fingerprint} );
1798 $mr->master_record( $b->id );
1799 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1802 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1803 $mr_map->metarecord( $mr->id );
1804 $mr_map->source( $b->id );
1805 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1809 $client->respond($b->id);
1813 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1817 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1818 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1821 __PACKAGE__->register_method(
1822 api_name => "open-ils.worm.fingerprint.record.update",
1823 method => "refingerprint_bibrec",
1829 __PACKAGE__->register_method(
1830 api_name => "open-ils.worm.fingerprint.record.update.nomap",
1831 method => "refingerprint_bibrec",
1838 sub fingerprint_bibrec {
1843 OpenILS::Application::Ingest->post_init();
1844 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1846 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1847 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1851 __PACKAGE__->register_method(
1852 api_name => "open-ils.worm.fingerprint.record",
1853 method => "fingerprint_bibrec",
1859 sub fingerprint_mods {
1864 OpenILS::Application::Ingest->post_init();
1865 my $mods = $parser->parse_string($xml)->documentElement;
1867 return _fp_mods( $mods );
1869 __PACKAGE__->register_method(
1870 api_name => "open-ils.worm.fingerprint.mods",
1871 method => "fingerprint_mods",
1876 sub fingerprint_marc {
1881 $xml = $parser->parse_string($xml) unless (ref $xml);
1883 OpenILS::Application::Ingest->post_init();
1884 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1885 $log->debug("Returning [$fp] as fingerprint", INFO);
1888 __PACKAGE__->register_method(
1889 api_name => "open-ils.worm.fingerprint.marc",
1890 method => "fingerprint_marc",
1898 sub biblio_fingerprint_record {
1903 OpenILS::Application::Ingest->post_init();
1905 my $marc = OpenILS::Application::Ingest
1906 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1909 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1910 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1913 __PACKAGE__->register_method(
1914 api_name => "open-ils.worm.fingerprint.record",
1915 method => "biblio_fingerprint_record",
1921 sub biblio_fingerprint {
1926 OpenILS::Application::Ingest->post_init();
1928 $marc = $parser->parse_string($marc) unless (ref $marc);
1930 my $mods = OpenILS::Application::Ingest::entityize(
1932 ->transform( $marc )
1938 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
1941 $log->internal("Got MARC [$marc]");
1942 $log->internal("Created MODS [$mods]");
1945 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1946 my $conf = OpenSRF::Utils::SettingsClient->new;
1948 my $libs = $conf->config_value(@pfx, 'script_path');
1949 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1950 my $script_libs = (ref($libs)) ? $libs : [$libs];
1952 $log->debug("Loading script $script_file for biblio fingerprinting...");
1954 $fp_script = new OpenILS::Utils::ScriptRunner
1955 ( file => $script_file,
1956 paths => $script_libs,
1957 reset_count => 1000 );
1960 $log->debug("Applying environment for biblio fingerprinting...");
1962 my $env = {marc => $marc, mods => $mods};
1963 #my $res = {fingerprint => '', quality => '0'};
1965 $fp_script->insert('environment' => $env);
1966 #$fp_script->insert('result' => $res);
1968 $log->debug("Running script for biblio fingerprinting...");
1970 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
1972 $log->debug("Script for biblio fingerprinting completed successfully...");
1976 __PACKAGE__->register_method(
1977 api_name => "open-ils.worm.fingerprint.marc",
1978 method => "biblio_fingerprint",
1983 # --------------------------------------------------------------------------------
1997 my $create_source_map;
2012 my %descriptor_code = (
2013 item_type => 'substr($ldr,6,1)',
2014 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2015 bib_level => 'substr($ldr,7,1)',
2016 control_type => 'substr($ldr,8,1)',
2017 char_encoding => 'substr($ldr,9,1)',
2018 enc_level => 'substr($ldr,17,1)',
2019 cat_form => 'substr($ldr,18,1)',
2020 pub_status => 'substr($ldr,5,1)',
2021 item_lang => 'substr($oo8,35,3)',
2022 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2023 audience => 'substr($oo8,22,1)',
2033 if ($self->api_name =~ /no_map/o) {
2037 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2039 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2041 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2043 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2045 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2046 unless ($sm_lookup);
2047 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2048 unless ($mr_lookup);
2049 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2050 unless ($mr_update);
2051 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2053 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2054 unless ($update_entry);
2055 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2056 unless ($rm_old_sm);
2057 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2058 unless ($rm_old_rd);
2059 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2060 unless ($rm_old_fr);
2061 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2062 unless ($rm_old_tr);
2063 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2064 unless ($rm_old_ar);
2065 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2066 unless ($rm_old_sr);
2067 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2068 unless ($rm_old_kr);
2069 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2070 unless ($rm_old_ser);
2071 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2072 unless ($mr_create);
2073 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2074 unless ($create_source_map);
2075 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2076 unless ($rd_create);
2077 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2078 unless ($fr_create);
2079 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2080 unless ($$create{title});
2081 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2082 unless ($$create{author});
2083 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2084 unless ($$create{subject});
2085 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2086 unless ($$create{keyword});
2087 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2088 unless ($$create{series});
2091 my ($outer_xact) = $in_xact->run;
2093 unless ($outer_xact) {
2094 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2095 my ($r) = $begin->run($client);
2096 unless (defined $r and $r) {
2098 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2101 } catch Error with {
2102 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2112 for my $entry ( $lookup->run(@docids) ) {
2113 # step -1: grab the doc from storage
2114 next unless ($entry);
2117 my $xslt_doc = $parser->parse_file(
2118 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2119 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2122 my $xml = $entry->marc;
2123 my $docid = $entry->id;
2124 my $marcdoc = $parser->parse_string($xml);
2125 my $modsdoc = $mods_sheet->transform($marcdoc);
2127 my $mods = $modsdoc->documentElement;
2128 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2130 $entry->fingerprint( fingerprint_mods( $mods ) );
2131 push @entry_list, $entry;
2133 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2136 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2137 if (!$mr || !@$mr) {
2138 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2139 $mr = new Fieldmapper::metabib::metarecord;
2140 $mr->fingerprint( $entry->fingerprint );
2141 $mr->master_record( $entry->id );
2142 my ($new_mr) = $mr_create->run($mr);
2144 unless (defined $mr) {
2145 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2148 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2153 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2154 $sm->metarecord( $mr->id );
2155 $sm->source( $entry->id );
2156 push @source_maps, $sm;
2159 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2160 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2162 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2163 for my $rd_field ( keys %descriptor_code ) {
2164 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2166 $rd_obj->record( $docid );
2167 push @rd_list, $rd_obj;
2169 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2171 # step 2: build the KOHA rows
2172 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2173 $_->record( $docid ) for (@tmp_list);
2174 push @ns_list, @tmp_list;
2178 last unless ($self->api_name =~ /batch$/o);
2181 $rm_old_rd->run( { record => \@docids } );
2182 $rm_old_fr->run( { record => \@docids } );
2183 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2184 $rm_old_tr->run( { source => \@docids } );
2185 $rm_old_ar->run( { source => \@docids } );
2186 $rm_old_sr->run( { source => \@docids } );
2187 $rm_old_kr->run( { source => \@docids } );
2188 $rm_old_ser->run( { source => \@docids } );
2191 my ($sm) = $create_source_map->run(@source_maps);
2192 unless (defined $sm) {
2193 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2195 my ($mr) = $mr_update->run(@mr_list);
2196 unless (defined $mr) {
2197 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2201 my ($re) = $update_entry->run(@entry_list);
2202 unless (defined $re) {
2203 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2206 my ($rd) = $rd_create->run(@rd_list);
2207 unless (defined $rd) {
2208 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2211 my ($fr) = $fr_create->run(@ns_list);
2212 unless (defined $fr) {
2213 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2216 # step 5: insert the new metadata
2217 for my $class ( qw/title author subject keyword series/ ) {
2219 for my $doc ( @mods_data ) {
2220 my ($did) = keys %$doc;
2221 my ($data) = values %$doc;
2223 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2224 for my $row ( keys %{ $$data{$class} } ) {
2225 next unless (exists $$data{$class}{$row});
2226 next unless ($$data{$class}{$row}{value});
2227 my $fm_obj = $fm_constructor->new;
2228 $fm_obj->value( $$data{$class}{$row}{value} );
2229 $fm_obj->field( $$data{$class}{$row}{field_id} );
2230 $fm_obj->source( $did );
2231 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2233 push @md_list, $fm_obj;
2237 my ($cr) = $$create{$class}->run(@md_list);
2238 unless (defined $cr) {
2239 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2243 unless ($outer_xact) {
2244 $log->debug("Commiting transaction started by the Ingest.", INFO);
2245 my ($c) = $commit->run;
2246 unless (defined $c and $c) {
2248 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2254 __PACKAGE__->register_method(
2255 api_name => "open-ils.worm.wormize",
2256 method => "wormize",
2260 __PACKAGE__->register_method(
2261 api_name => "open-ils.worm.wormize.no_map",
2262 method => "wormize",
2266 __PACKAGE__->register_method(
2267 api_name => "open-ils.worm.wormize.batch",
2268 method => "wormize",
2272 __PACKAGE__->register_method(
2273 api_name => "open-ils.worm.wormize.no_map.batch",
2274 method => "wormize",
2289 my $acreate_source_map;
2304 sub authority_wormize {
2311 if ($self->api_name =~ /no_map/o) {
2315 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2317 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2319 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2321 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2323 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2325 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2326 unless ($aupdate_entry);
2327 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2328 unless ($arm_old_rd);
2329 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2330 unless ($arm_old_fr);
2331 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2332 unless ($ard_create);
2333 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2334 unless ($afr_create);
2337 my ($outer_xact) = $in_xact->run;
2339 unless ($outer_xact) {
2340 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2341 my ($r) = $begin->run($client);
2342 unless (defined $r and $r) {
2344 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2347 } catch Error with {
2348 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2358 for my $entry ( $lookup->run(@docids) ) {
2359 # step -1: grab the doc from storage
2360 next unless ($entry);
2363 # my $xslt_doc = $parser->parse_file(
2364 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2365 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2368 my $xml = $entry->marc;
2369 my $docid = $entry->id;
2370 my $marcdoc = $parser->parse_string($xml);
2371 #my $madsdoc = $mads_sheet->transform($marcdoc);
2373 #my $mads = $madsdoc->documentElement;
2374 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2376 push @entry_list, $entry;
2378 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2379 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2381 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2382 for my $rd_field ( keys %descriptor_code ) {
2383 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2385 $rd_obj->record( $docid );
2386 push @rd_list, $rd_obj;
2388 # step 2: build the KOHA rows
2389 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2390 $_->record( $docid ) for (@tmp_list);
2391 push @ns_list, @tmp_list;
2395 last unless ($self->api_name =~ /batch$/o);
2398 $arm_old_rd->run( { record => \@docids } );
2399 $arm_old_fr->run( { record => \@docids } );
2401 my ($rd) = $ard_create->run(@rd_list);
2402 unless (defined $rd) {
2403 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2406 my ($fr) = $fr_create->run(@ns_list);
2407 unless (defined $fr) {
2408 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2411 unless ($outer_xact) {
2412 $log->debug("Commiting transaction started by Ingest.", INFO);
2413 my ($c) = $commit->run;
2414 unless (defined $c and $c) {
2416 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2422 __PACKAGE__->register_method(
2423 api_name => "open-ils.worm.authortiy.wormize",
2424 method => "wormize",
2428 __PACKAGE__->register_method(
2429 api_name => "open-ils.worm.authority.wormize.batch",
2430 method => "wormize",
2436 # --------------------------------------------------------------------------------
2439 sub _marcxml_to_full_rows {
2441 my $marcxml = shift;
2442 my $type = shift || 'Fieldmapper::metabib::full_rec';
2446 my $root = $marcxml->documentElement;
2448 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2449 next unless $tagline;
2451 my $ns = new Fieldmapper::metabib::full_rec;
2454 my $val = NFD($tagline->textContent);
2455 $val =~ s/(\pM+)//gso;
2461 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2462 next unless $tagline;
2464 my $ns = new Fieldmapper::metabib::full_rec;
2466 $ns->tag( $tagline->getAttribute( "tag" ) );
2467 my $val = NFD($tagline->textContent);
2468 $val =~ s/(\pM+)//gso;
2474 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2475 next unless $tagline;
2477 my $tag = $tagline->getAttribute( "tag" );
2478 my $ind1 = $tagline->getAttribute( "ind1" );
2479 my $ind2 = $tagline->getAttribute( "ind2" );
2481 for my $data ( $tagline->childNodes ) {
2484 my $ns = $type->new;
2489 $ns->subfield( $data->getAttribute( "code" ) );
2490 my $val = NFD($data->textContent);
2491 $val =~ s/(\pM+)//gso;
2492 $ns->value( lc($val) );
2500 sub _get_field_value {
2502 my( $root, $xpath ) = @_;
2506 # grab the set of matching nodes
2507 my @nodes = $root->findnodes( $xpath );
2508 for my $value (@nodes) {
2510 # grab all children of the node
2511 my @children = $value->childNodes();
2512 for my $child (@children) {
2514 # add the childs content to the growing buffer
2515 my $content = quotemeta($child->textContent);
2516 next if ($string =~ /$content/); # uniquify the values
2517 $string .= $child->textContent . " ";
2520 $string .= $value->textContent . " ";
2523 $string = NFD($string);
2524 $string =~ s/(\pM)//gso;
2529 sub modsdoc_to_values {
2530 my( $self, $mods ) = @_;
2532 for my $class (keys %$xpathset) {
2533 $data->{$class} = {};
2534 for my $type (keys %{$xpathset->{$class}}) {
2535 $data->{$class}->{$type} = {};
2536 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};