1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
30 our $log = 'OpenSRF::Utils::Logger';
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
43 unless (keys %$xpathset) {
44 $log->debug("Running post_init", DEBUG);
46 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
48 unless ($supported_formats{mods}{xslt}) {
49 $log->debug("Loading MODS XSLT", DEBUG);
50 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
54 unless ($supported_formats{mods3}{xslt}) {
55 $log->debug("Loading MODS v3 XSLT", DEBUG);
56 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61 my $req = OpenSRF::AppSession
62 ->create('open-ils.cstore')
63 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
66 if (ref $req and @$req) {
68 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
87 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
91 sub ro_biblio_ingest_single_xml {
96 my $document = $parser->parse_string($xml);
98 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
99 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
100 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
102 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp };
104 __PACKAGE__->register_method(
105 api_name => "open-ils.ingest.full.biblio.xml.readonly",
106 method => "ro_biblio_ingest_single_xml",
111 sub ro_biblio_ingest_single_record {
116 OpenILS::Application::Ingest->post_init();
117 my $r = OpenSRF::AppSession
118 ->create('open-ils.cstore')
119 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
122 return undef unless ($r and @$r);
124 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
126 $_->source($rec) for (@{$res->{field_entries}});
127 $_->record($rec) for (@{$res->{full_rec}});
131 __PACKAGE__->register_method(
132 api_name => "open-ils.ingest.full.biblio.record.readonly",
133 method => "ro_biblio_ingest_single_record",
138 sub ro_biblio_ingest_stream_record {
142 OpenILS::Application::Ingest->post_init();
144 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
146 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
148 my $rec = $resp->content;
149 last unless (defined $rec);
151 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
152 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
154 $_->source($rec) for (@{$res->{field_entries}});
155 $_->record($rec) for (@{$res->{full_rec}});
157 $client->respond( $res );
162 __PACKAGE__->register_method(
163 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
164 method => "ro_biblio_ingest_stream_record",
169 sub ro_biblio_ingest_stream_xml {
173 OpenILS::Application::Ingest->post_init();
175 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
177 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
179 my $xml = $resp->content;
180 last unless (defined $xml);
182 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
183 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
185 $client->respond( $res );
190 __PACKAGE__->register_method(
191 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
192 method => "ro_biblio_ingest_stream_xml",
198 # --------------------------------------------------------------------------------
199 # MARC index extraction
201 package OpenILS::Application::Ingest::XPATH;
202 use base qw/OpenILS::Application::Ingest/;
203 use Unicode::Normalize;
205 # give this an XML documentElement and an XPATH expression
206 sub xpath_to_string {
210 my $ns_prefix = shift;
213 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
217 # grab the set of matching nodes
218 my @nodes = $xml->findnodes( $xpath );
219 for my $value (@nodes) {
221 # grab all children of the node
222 my @children = $value->childNodes();
223 for my $child (@children) {
225 # add the childs content to the growing buffer
226 my $content = quotemeta($child->textContent);
227 next if ($unique && $string =~ /$content/); # uniquify the values
228 $string .= $child->textContent . " ";
231 $string .= $value->textContent . " ";
237 sub class_index_string_xml {
243 OpenILS::Application::Ingest->post_init();
244 $xml = $parser->parse_string($xml) unless (ref $xml);
248 for my $class (@classes) {
249 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
250 for my $type ( keys %{ $xpathset->{$class} } ) {
252 my $def = $xpathset->{$class}->{$type};
253 my $sf = $supported_formats{$def->{format}};
258 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
259 $transform_cache{$def->{format}} = $document;
262 my $value = xpath_to_string(
263 $document->documentElement => $def->{xpath},
264 $sf->{ns} => $def->{format},
270 $value =~ s/\pM+//sgo;
271 $value =~ s/\pC+//sgo;
272 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
274 $value =~ s/(\w)\./$1/sgo;
277 my $fm = $class_constructor->new;
278 $fm->value( $value );
279 $fm->field( $xpathset->{$class}->{$type}->{id} );
280 $client->respond($fm);
285 __PACKAGE__->register_method(
286 api_name => "open-ils.ingest.field_entry.class.xml",
287 method => "class_index_string_xml",
293 sub class_index_string_record {
299 OpenILS::Application::Ingest->post_init();
300 my $r = OpenSRF::AppSession
301 ->create('open-ils.cstore')
302 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
305 return undef unless ($r and @$r);
307 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
309 $client->respond($fm);
313 __PACKAGE__->register_method(
314 api_name => "open-ils.ingest.field_entry.class.record",
315 method => "class_index_string_record",
321 sub all_index_string_xml {
326 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
327 $client->respond($fm);
331 __PACKAGE__->register_method(
332 api_name => "open-ils.ingest.extract.field_entry.all.xml",
333 method => "all_index_string_xml",
339 sub all_index_string_record {
344 OpenILS::Application::Ingest->post_init();
345 my $r = OpenSRF::AppSession
346 ->create('open-ils.cstore')
347 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
350 return undef unless ($r and @$r);
352 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
354 $client->respond($fm);
358 __PACKAGE__->register_method(
359 api_name => "open-ils.ingest.extract.field_entry.all.record",
360 method => "all_index_string_record",
366 # --------------------------------------------------------------------------------
369 package OpenILS::Application::Ingest::FlatMARC;
370 use base qw/OpenILS::Application::Ingest/;
371 use Unicode::Normalize;
374 sub _marcxml_to_full_rows {
377 my $xmltype = shift || 'metabib';
379 my $type = "Fieldmapper::${xmltype}::full_rec";
383 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
385 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
386 next unless $tagline;
391 my $val = $tagline->textContent;
393 $val =~ s/(\pM+)//gso;
399 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
400 next unless $tagline;
404 $ns->tag( $tagline->getAttribute( "tag" ) );
405 my $val = $tagline->textContent;
407 $val =~ s/(\pM+)//gso;
413 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
414 next unless $tagline;
416 my $tag = $tagline->getAttribute( "tag" );
417 my $ind1 = $tagline->getAttribute( "ind1" );
418 my $ind2 = $tagline->getAttribute( "ind2" );
420 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
428 $ns->subfield( $data->getAttribute( "code" ) );
429 my $val = $data->textContent;
431 $val =~ s/(\pM+)//gso;
432 $ns->value( lc($val) );
438 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
447 $log->debug("processing [$xml]");
449 $xml = $parser->parse_string($xml) unless (ref $xml);
451 my $type = 'metabib';
452 $type = 'authority' if ($self->api_name =~ /authority/o);
454 OpenILS::Application::Ingest->post_init();
456 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
459 __PACKAGE__->register_method(
460 api_name => "open-ils.ingest.flat_marc.authority.xml",
461 method => "flat_marc_xml",
466 __PACKAGE__->register_method(
467 api_name => "open-ils.ingest.flat_marc.biblio.xml",
468 method => "flat_marc_xml",
474 sub flat_marc_record {
480 $type = 'authority' if ($self->api_name =~ /authority/o);
482 OpenILS::Application::Ingest->post_init();
483 my $r = OpenSRF::AppSession
484 ->create('open-ils.cstore')
485 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
489 return undef unless ($r and $r->marc);
491 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
492 for my $row (@rows) {
493 $client->respond($row);
494 $log->debug(JSON->perl2JSON($row), DEBUG);
498 __PACKAGE__->register_method(
499 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
500 method => "flat_marc_record",
505 __PACKAGE__->register_method(
506 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
507 method => "flat_marc_record",
513 # --------------------------------------------------------------------------------
516 package OpenILS::Application::Ingest::Biblio::Fingerprint;
517 use base qw/OpenILS::Application::Ingest/;
518 use Unicode::Normalize;
519 use OpenSRF::EX qw/:try/;
521 sub biblio_fingerprint_record {
526 OpenILS::Application::Ingest->post_init();
528 my $r = OpenSRF::AppSession
529 ->create('open-ils.cstore')
530 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
533 return undef unless ($r and $r->marc);
535 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
536 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
539 __PACKAGE__->register_method(
540 api_name => "open-ils.ingest.fingerprint.record",
541 method => "biblio_fingerprint_record",
547 sub biblio_fingerprint {
552 $log->internal("Got MARC [$xml]");
555 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
556 my $conf = OpenSRF::Utils::SettingsClient->new;
558 my $libs = $conf->config_value(@pfx, 'script_path');
559 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
560 my $script_libs = (ref($libs)) ? $libs : [$libs];
562 $log->debug("Loading script $script_file for biblio fingerprinting...");
564 $fp_script = new OpenILS::Utils::ScriptRunner
565 ( file => $script_file,
566 paths => $script_libs,
567 reset_count => 1000 );
570 $fp_script->insert('environment' => {marc => $xml} => 1);
572 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
573 $log->debug("Script for biblio fingerprinting completed successfully...");
577 __PACKAGE__->register_method(
578 api_name => "open-ils.ingest.fingerprint.xml",
579 method => "biblio_fingerprint",
590 OpenILS::Application::Ingest->post_init();
591 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
594 sub begin_transaction {
598 OpenILS::Application::Ingest->post_init();
599 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
603 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
604 #__PACKAGE__->st_sess->connect;
605 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
606 unless (defined $r and $r) {
607 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
608 #__PACKAGE__->st_sess->disconnect;
609 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
613 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
616 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
619 sub rollback_transaction {
623 OpenILS::Application::Ingest->post_init();
624 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
628 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
630 $log->debug("Ingest isn't inside a transaction.", INFO);
633 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
639 sub commit_transaction {
643 OpenILS::Application::Ingest->post_init();
644 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
647 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
649 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
650 unless (defined $r and $r) {
651 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
652 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
654 #__PACKAGE__->st_sess->disconnect;
656 $log->debug("Ingest isn't inside a transaction.", INFO);
659 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
668 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
669 return shift( @res );
672 sub scrub_authority_record {
678 if (!OpenILS::Application::Ingest->in_transaction) {
679 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
685 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
687 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
688 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
690 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
692 $log->debug('Scrubbing failed : '.shift(), ERROR);
693 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
697 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
698 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
701 __PACKAGE__->register_method(
702 api_name => "open-ils.worm.scrub.authority",
703 method => "scrub_authority_record",
709 sub scrub_metabib_record {
714 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
715 $rec = OpenILS::Application::Ingest->storage_req(
716 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
721 if (!OpenILS::Application::Ingest->in_transaction) {
722 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
728 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
730 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
731 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
732 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
733 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
734 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
735 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
736 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
737 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
739 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
740 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
742 for my $mr (@$masters) {
743 $log->debug( "Found metarecord whose master is $rec", DEBUG);
744 my $others = OpenILS::Application::Ingest->storage_req(
745 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
748 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
749 $mr->master_record($others->[0]->source);
750 OpenILS::Application::Ingest->storage_req(
751 'open-ils.storage.direct.metabib.metarecord.remote_update',
753 { master_record => $others->[0]->source, mods => undef }
756 warn "Removing metarecord whose master is $rec";
757 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
758 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
759 warn "Metarecord removed";
760 $log->debug( "Metarecord removed", DEBUG);
764 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
767 $log->debug('Scrubbing failed : '.shift(), ERROR);
768 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
772 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
773 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
776 __PACKAGE__->register_method(
777 api_name => "open-ils.worm.scrub.biblio",
778 method => "scrub_metabib_record",
783 sub wormize_biblio_metarecord {
788 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
794 $success = wormize_biblio_record($self => $client => $r->source);
796 { record => $r->source,
797 metarecord => $rec->metarecord,
804 { record => $r->source,
805 metarecord => $rec->metarecord,
814 __PACKAGE__->register_method(
815 api_name => "open-ils.worm.wormize.metarecord",
816 method => "wormize_biblio_metarecord",
821 __PACKAGE__->register_method(
822 api_name => "open-ils.worm.wormize.metarecord.nomap",
823 method => "wormize_biblio_metarecord",
828 __PACKAGE__->register_method(
829 api_name => "open-ils.worm.wormize.metarecord.noscrub",
830 method => "wormize_biblio_metarecord",
835 __PACKAGE__->register_method(
836 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
837 method => "wormize_biblio_metarecord",
844 sub wormize_biblio_record {
849 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
850 $rec = OpenILS::Application::Ingest->storage_req(
851 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
857 if (!OpenILS::Application::Ingest->in_transaction) {
858 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
865 unless ($self->api_name =~ /noscrub/o) {
866 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
870 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
873 my @rec_descriptor = ();
885 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
887 my $xml = $parser->parse_string($r->marc);
889 #update the fingerprint
890 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
891 OpenILS::Application::Ingest->storage_req(
892 'open-ils.storage.direct.biblio.record_entry.remote_update',
894 { fingerprint => $fp->{fingerprint},
895 quality => int($fp->{quality}) }
896 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
899 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
900 $fr->record( $r->id );
904 # the rec_descriptor stuff
905 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
906 $rd->record( $r->id );
907 push @rec_descriptor, $rd;
909 # the indexing field entry stuff
910 for my $class ( qw/title author subject keyword series/ ) {
911 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
912 $fe->source( $r->id );
913 push @{$field_entry{$class}}, $fe;
917 unless ($self->api_name =~ /nomap/o) {
918 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
921 $mr = Fieldmapper::metabib::metarecord->new;
922 $mr->fingerprint( $fp->{fingerprint} );
923 $mr->master_record( $r->id );
924 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
927 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
928 $mr_map->metarecord( $mr->id );
929 $mr_map->source( $r->id );
930 push @source_map, $mr_map;
932 $metarecord{$mr->id} = $mr;
934 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
936 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
937 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
942 if (@rec_descriptor) {
943 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
945 OpenILS::Application::Ingest->storage_req(
946 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
950 for my $mr ( values %metarecord ) {
951 my $sources = OpenILS::Application::Ingest->storage_req(
952 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
956 my $bibs = OpenILS::Application::Ingest->storage_req(
957 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
958 [ map { $_->source } @$sources ]
961 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
963 OpenILS::Application::Ingest->storage_req(
964 'open-ils.storage.direct.metabib.metarecord.remote_update',
966 { master_record => $master->id, mods => undef }
970 OpenILS::Application::Ingest->storage_req(
971 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
973 ) if (@rec_descriptor);
975 OpenILS::Application::Ingest->storage_req(
976 'open-ils.storage.direct.metabib.full_rec.batch.create',
980 OpenILS::Application::Ingest->storage_req(
981 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
982 @{ $field_entry{title} }
983 ) if (@{ $field_entry{title} });
985 OpenILS::Application::Ingest->storage_req(
986 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
987 @{ $field_entry{author} }
988 ) if (@{ $field_entry{author} });
990 OpenILS::Application::Ingest->storage_req(
991 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
992 @{ $field_entry{subject} }
993 ) if (@{ $field_entry{subject} });
995 OpenILS::Application::Ingest->storage_req(
996 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
997 @{ $field_entry{keyword} }
998 ) if (@{ $field_entry{keyword} });
1000 OpenILS::Application::Ingest->storage_req(
1001 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1002 @{ $field_entry{series} }
1003 ) if (@{ $field_entry{series} });
1005 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1011 $log->debug('Wormization failed : '.shift(), ERROR);
1012 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1016 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1017 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1020 __PACKAGE__->register_method(
1021 api_name => "open-ils.worm.wormize.biblio",
1022 method => "wormize_biblio_record",
1026 __PACKAGE__->register_method(
1027 api_name => "open-ils.worm.wormize.biblio.nomap",
1028 method => "wormize_biblio_record",
1032 __PACKAGE__->register_method(
1033 api_name => "open-ils.worm.wormize.biblio.noscrub",
1034 method => "wormize_biblio_record",
1038 __PACKAGE__->register_method(
1039 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1040 method => "wormize_biblio_record",
1045 sub wormize_authority_record {
1051 if (!OpenILS::Application::Ingest->in_transaction) {
1052 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1058 # clean up the cruft
1059 unless ($self->api_name =~ /noscrub/o) {
1060 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1064 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1067 my @rec_descriptor = ();
1068 for my $r (@$bibs) {
1069 my $xml = $parser->parse_string($r->marc);
1071 # the full_rec stuff
1072 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1073 $fr->record( $r->id );
1074 push @full_rec, $fr;
1077 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1078 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1079 #$rd->record( $r->id );
1080 #push @rec_descriptor, $rd;
1084 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1086 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1087 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1089 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1092 $log->debug('Wormization failed : '.shift(), ERROR);
1093 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1097 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1098 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1101 __PACKAGE__->register_method(
1102 api_name => "open-ils.worm.wormize.authority",
1103 method => "wormize_authority_record",
1107 __PACKAGE__->register_method(
1108 api_name => "open-ils.worm.wormize.authority.noscrub",
1109 method => "wormize_authority_record",
1115 # --------------------------------------------------------------------------------
1116 # MARC index extraction
1118 package OpenILS::Application::Ingest::XPATH;
1119 use base qw/OpenILS::Application::Ingest/;
1120 use Unicode::Normalize;
1122 # give this a MODS documentElement and an XPATH expression
1123 sub _xpath_to_string {
1127 my $ns_prefix = shift;
1130 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1134 # grab the set of matching nodes
1135 my @nodes = $xml->findnodes( $xpath );
1136 for my $value (@nodes) {
1138 # grab all children of the node
1139 my @children = $value->childNodes();
1140 for my $child (@children) {
1142 # add the childs content to the growing buffer
1143 my $content = quotemeta($child->textContent);
1144 next if ($unique && $string =~ /$content/); # uniquify the values
1145 $string .= $child->textContent . " ";
1148 $string .= $value->textContent . " ";
1151 return NFD($string);
1154 sub class_all_index_string_xml {
1160 OpenILS::Application::Ingest->post_init();
1161 $xml = $parser->parse_string($xml) unless (ref $xml);
1163 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1164 for my $type ( keys %{ $xpathset->{$class} } ) {
1165 my $value = _xpath_to_string(
1166 $mods_sheet->transform($xml)->documentElement,
1167 $xpathset->{$class}->{$type}->{xpath},
1168 "http://www.loc.gov/mods/",
1175 $value =~ s/\pM+//sgo;
1176 $value =~ s/\pC+//sgo;
1177 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1179 $value =~ s/(\w)\./$1/sgo;
1180 $value = lc($value);
1182 my $fm = $class_constructor->new;
1183 $fm->value( $value );
1184 $fm->field( $xpathset->{$class}->{$type}->{id} );
1185 $client->respond($fm);
1189 __PACKAGE__->register_method(
1190 api_name => "open-ils.worm.field_entry.class.xml",
1191 method => "class_all_index_string_xml",
1197 sub class_all_index_string_record {
1203 OpenILS::Application::Ingest->post_init();
1204 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1206 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1208 $client->respond($fm);
1212 __PACKAGE__->register_method(
1213 api_name => "open-ils.worm.field_entry.class.record",
1214 method => "class_all_index_string_record",
1221 sub class_index_string_xml {
1228 OpenILS::Application::Ingest->post_init();
1229 $xml = $parser->parse_string($xml) unless (ref $xml);
1230 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1232 __PACKAGE__->register_method(
1233 api_name => "open-ils.worm.class.type.xml",
1234 method => "class_index_string_xml",
1239 sub class_index_string_record {
1246 OpenILS::Application::Ingest->post_init();
1247 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1249 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1250 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1253 __PACKAGE__->register_method(
1254 api_name => "open-ils.worm.class.type.record",
1255 method => "class_index_string_record",
1269 OpenILS::Application::Ingest->post_init();
1270 $xml = $parser->parse_string($xml) unless (ref $xml);
1271 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1273 __PACKAGE__->register_method(
1274 api_name => "open-ils.worm.xpath.xml",
1275 method => "xml_xpath",
1289 OpenILS::Application::Ingest->post_init();
1290 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1292 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1293 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1296 __PACKAGE__->register_method(
1297 api_name => "open-ils.worm.xpath.record",
1298 method => "record_xpath",
1304 # --------------------------------------------------------------------------------
1307 package OpenILS::Application::Ingest::Biblio::Leader;
1308 use base qw/OpenILS::Application::Ingest/;
1309 use Unicode::Normalize;
1311 our %marc_type_groups = (
1314 VIS => q/[gkro]{1}/,
1323 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1327 our %biblio_descriptor_code = (
1328 item_type => sub { substr($ldr,6,1); },
1331 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1332 return substr($oo8,29,1);
1333 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1334 return substr($oo8,23,1);
1338 bib_level => sub { substr($ldr,7,1); },
1339 control_type => sub { substr($ldr,8,1); },
1340 char_encoding => sub { substr($ldr,9,1); },
1341 enc_level => sub { substr($ldr,17,1); },
1342 cat_form => sub { substr($ldr,18,1); },
1343 pub_status => sub { substr($ldr,5,1); },
1344 item_lang => sub { substr($oo8,35,3); },
1345 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1346 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1347 audience => sub { substr($oo8,22,1); },
1350 sub _extract_biblio_descriptors {
1353 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1354 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1355 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1357 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1358 for my $rd_field ( keys %biblio_descriptor_code ) {
1359 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1365 sub extract_biblio_desc_xml {
1370 $xml = $parser->parse_string($xml) unless (ref $xml);
1372 return _extract_biblio_descriptors( $xml );
1374 __PACKAGE__->register_method(
1375 api_name => "open-ils.worm.biblio_leader.xml",
1376 method => "extract_biblio_desc_xml",
1381 sub extract_biblio_desc_record {
1386 OpenILS::Application::Ingest->post_init();
1387 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1389 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1390 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1393 __PACKAGE__->register_method(
1394 api_name => "open-ils.worm.biblio_leader.record",
1395 method => "extract_biblio_desc_record",
1400 # --------------------------------------------------------------------------------
1403 package OpenILS::Application::Ingest::FlatMARC;
1404 use base qw/OpenILS::Application::Ingest/;
1405 use Unicode::Normalize;
1408 sub _marcxml_to_full_rows {
1410 my $marcxml = shift;
1411 my $xmltype = shift || 'metabib';
1413 my $type = "Fieldmapper::${xmltype}::full_rec";
1417 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1419 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1420 next unless $tagline;
1422 my $ns = $type->new;
1425 my $val = $tagline->textContent;
1427 $val =~ s/(\pM+)//gso;
1433 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1434 next unless $tagline;
1436 my $ns = $type->new;
1438 $ns->tag( $tagline->getAttribute( "tag" ) );
1439 my $val = $tagline->textContent;
1441 $val =~ s/(\pM+)//gso;
1447 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1448 next unless $tagline;
1450 my $tag = $tagline->getAttribute( "tag" );
1451 my $ind1 = $tagline->getAttribute( "ind1" );
1452 my $ind2 = $tagline->getAttribute( "ind2" );
1454 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1457 my $ns = $type->new;
1462 $ns->subfield( $data->getAttribute( "code" ) );
1463 my $val = $data->textContent;
1465 $val =~ s/(\pM+)//gso;
1466 $ns->value( lc($val) );
1472 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1481 $xml = $parser->parse_string($xml) unless (ref $xml);
1483 my $type = 'metabib';
1484 $type = 'authority' if ($self->api_name =~ /authority/o);
1486 OpenILS::Application::Ingest->post_init();
1488 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1491 __PACKAGE__->register_method(
1492 api_name => "open-ils.worm.flat_marc.authority.xml",
1493 method => "flat_marc_xml",
1498 __PACKAGE__->register_method(
1499 api_name => "open-ils.worm.flat_marc.biblio.xml",
1500 method => "flat_marc_xml",
1506 sub flat_marc_record {
1511 my $type = 'biblio';
1512 $type = 'authority' if ($self->api_name =~ /authority/o);
1514 OpenILS::Application::Ingest->post_init();
1515 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1517 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1520 __PACKAGE__->register_method(
1521 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1522 method => "flat_marc_record",
1527 __PACKAGE__->register_method(
1528 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1529 method => "flat_marc_record",
1536 # --------------------------------------------------------------------------------
1539 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1540 use base qw/OpenILS::Application::Ingest/;
1541 use Unicode::Normalize;
1542 use OpenSRF::EX qw/:try/;
1544 my @fp_mods_xpath = (
1545 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1548 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1549 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1550 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1551 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1554 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1556 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1557 $text =~ s/\pM+//gso;
1558 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1560 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1561 $text =~ s/\s+/ /sgo;
1562 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1563 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1564 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1565 $text =~ s/\b(?:the|an?)\b//sgo;
1566 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1567 $text =~ s/\[.[^\]]+\]//sgo;
1568 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1569 $text =~ s/\s*[;\/\.]*$//sgo;
1570 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1575 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1576 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1579 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1581 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1582 $text =~ s/\pM+//gso;
1583 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1585 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1586 $text =~ s/\s+/ /sgo;
1587 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1588 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1589 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1590 $text =~ s/,?\s+.*$//sgo;
1591 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1596 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1599 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1600 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1601 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1602 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1603 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1604 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1605 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1606 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1609 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1611 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1612 $text =~ s/\pM+//gso;
1613 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1615 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1616 $text =~ s/\s+/ /sgo;
1617 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1618 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1619 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1620 $text =~ s/\b(?:the|an?)\b//sgo;
1621 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1622 $text =~ s/\[.[^\]]+\]//sgo;
1623 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1624 $text =~ s/\s*[;\/\.]*$//sgo;
1625 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1630 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1631 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1632 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1633 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1636 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1638 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1639 $text =~ s/\pM+//gso;
1640 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1642 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1643 $text =~ s/\s+/ /sgo;
1644 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1645 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1646 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1647 $text =~ s/,?\s+.*$//sgo;
1648 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1655 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1659 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1663 my $match_index = 0;
1664 my $block_index = 1;
1665 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1666 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1668 my $block_name_index = 0;
1669 my $block_value_index = 1;
1670 my $block = $fp_mods_xpath[$block_index];
1671 while ( my $part = $$block[$block_value_index] ) {
1673 for my $xpath ( @{ $part->{xpath} } ) {
1674 $text = $mods->findvalue( $xpath );
1678 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1682 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1683 $fp_string .= $text;
1686 $block_name_index += 2;
1687 $block_value_index += 2;
1691 $fp_string =~ s/\W+//gso;
1692 $log->debug("Fingerprint is [$fp_string]", INFO);;
1702 sub refingerprint_bibrec {
1708 if (!OpenILS::Application::Ingest->in_transaction) {
1709 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1715 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1716 for my $b (@$bibs) {
1717 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1719 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1721 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1723 OpenILS::Application::Ingest->storage_req(
1724 'open-ils.storage.direct.biblio.record_entry.remote_update',
1726 { fingerprint => $fp->{fingerprint},
1727 quality => $fp->{quality} }
1730 if ($self->api_name !~ /nomap/o) {
1731 my $old_source_map = OpenILS::Application::Ingest->storage_req(
1732 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1737 if (ref($old_source_map) and @$old_source_map) {
1738 for my $m (@$old_source_map) {
1739 $old_mrid = $m->metarecord;
1740 OpenILS::Application::Ingest->storage_req(
1741 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1747 my $old_sm = OpenILS::Application::Ingest->storage_req(
1748 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1749 { metarecord => $old_mrid }
1752 if (ref($old_sm) and @$old_sm == 0) {
1753 OpenILS::Application::Ingest->storage_req(
1754 'open-ils.storage.direct.metabib.metarecord.delete',
1759 my $mr = OpenILS::Application::Ingest->storage_req(
1760 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1761 { fingerprint => $fp->{fingerprint} }
1765 $mr = Fieldmapper::metabib::metarecord->new;
1766 $mr->fingerprint( $fp->{fingerprint} );
1767 $mr->master_record( $b->id );
1768 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1771 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1772 $mr_map->metarecord( $mr->id );
1773 $mr_map->source( $b->id );
1774 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1778 $client->respond($b->id);
1782 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1786 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1787 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1790 __PACKAGE__->register_method(
1791 api_name => "open-ils.worm.fingerprint.record.update",
1792 method => "refingerprint_bibrec",
1798 __PACKAGE__->register_method(
1799 api_name => "open-ils.worm.fingerprint.record.update.nomap",
1800 method => "refingerprint_bibrec",
1807 sub fingerprint_bibrec {
1812 OpenILS::Application::Ingest->post_init();
1813 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1815 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1816 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1820 __PACKAGE__->register_method(
1821 api_name => "open-ils.worm.fingerprint.record",
1822 method => "fingerprint_bibrec",
1828 sub fingerprint_mods {
1833 OpenILS::Application::Ingest->post_init();
1834 my $mods = $parser->parse_string($xml)->documentElement;
1836 return _fp_mods( $mods );
1838 __PACKAGE__->register_method(
1839 api_name => "open-ils.worm.fingerprint.mods",
1840 method => "fingerprint_mods",
1845 sub fingerprint_marc {
1850 $xml = $parser->parse_string($xml) unless (ref $xml);
1852 OpenILS::Application::Ingest->post_init();
1853 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1854 $log->debug("Returning [$fp] as fingerprint", INFO);
1857 __PACKAGE__->register_method(
1858 api_name => "open-ils.worm.fingerprint.marc",
1859 method => "fingerprint_marc",
1867 sub biblio_fingerprint_record {
1872 OpenILS::Application::Ingest->post_init();
1874 my $marc = OpenILS::Application::Ingest
1875 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1878 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1879 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1882 __PACKAGE__->register_method(
1883 api_name => "open-ils.worm.fingerprint.record",
1884 method => "biblio_fingerprint_record",
1890 sub biblio_fingerprint {
1895 OpenILS::Application::Ingest->post_init();
1897 $marc = $parser->parse_string($marc) unless (ref $marc);
1899 my $mods = OpenILS::Application::Ingest::entityize(
1901 ->transform( $marc )
1907 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
1910 $log->internal("Got MARC [$marc]");
1911 $log->internal("Created MODS [$mods]");
1914 my @pfx = ( "apps", "open-ils.storage","app_settings" );
1915 my $conf = OpenSRF::Utils::SettingsClient->new;
1917 my $libs = $conf->config_value(@pfx, 'script_path');
1918 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1919 my $script_libs = (ref($libs)) ? $libs : [$libs];
1921 $log->debug("Loading script $script_file for biblio fingerprinting...");
1923 $fp_script = new OpenILS::Utils::ScriptRunner
1924 ( file => $script_file,
1925 paths => $script_libs,
1926 reset_count => 1000 );
1929 $log->debug("Applying environment for biblio fingerprinting...");
1931 my $env = {marc => $marc, mods => $mods};
1932 #my $res = {fingerprint => '', quality => '0'};
1934 $fp_script->insert('environment' => $env);
1935 #$fp_script->insert('result' => $res);
1937 $log->debug("Running script for biblio fingerprinting...");
1939 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
1941 $log->debug("Script for biblio fingerprinting completed successfully...");
1945 __PACKAGE__->register_method(
1946 api_name => "open-ils.worm.fingerprint.marc",
1947 method => "biblio_fingerprint",
1952 # --------------------------------------------------------------------------------
1966 my $create_source_map;
1981 my %descriptor_code = (
1982 item_type => 'substr($ldr,6,1)',
1983 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
1984 bib_level => 'substr($ldr,7,1)',
1985 control_type => 'substr($ldr,8,1)',
1986 char_encoding => 'substr($ldr,9,1)',
1987 enc_level => 'substr($ldr,17,1)',
1988 cat_form => 'substr($ldr,18,1)',
1989 pub_status => 'substr($ldr,5,1)',
1990 item_lang => 'substr($oo8,35,3)',
1991 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
1992 audience => 'substr($oo8,22,1)',
2002 if ($self->api_name =~ /no_map/o) {
2006 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2008 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2010 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2012 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2014 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2015 unless ($sm_lookup);
2016 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2017 unless ($mr_lookup);
2018 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2019 unless ($mr_update);
2020 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2022 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2023 unless ($update_entry);
2024 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2025 unless ($rm_old_sm);
2026 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2027 unless ($rm_old_rd);
2028 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2029 unless ($rm_old_fr);
2030 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2031 unless ($rm_old_tr);
2032 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2033 unless ($rm_old_ar);
2034 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2035 unless ($rm_old_sr);
2036 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2037 unless ($rm_old_kr);
2038 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2039 unless ($rm_old_ser);
2040 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2041 unless ($mr_create);
2042 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2043 unless ($create_source_map);
2044 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2045 unless ($rd_create);
2046 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2047 unless ($fr_create);
2048 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2049 unless ($$create{title});
2050 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2051 unless ($$create{author});
2052 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2053 unless ($$create{subject});
2054 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2055 unless ($$create{keyword});
2056 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2057 unless ($$create{series});
2060 my ($outer_xact) = $in_xact->run;
2062 unless ($outer_xact) {
2063 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2064 my ($r) = $begin->run($client);
2065 unless (defined $r and $r) {
2067 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2070 } catch Error with {
2071 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2081 for my $entry ( $lookup->run(@docids) ) {
2082 # step -1: grab the doc from storage
2083 next unless ($entry);
2086 my $xslt_doc = $parser->parse_file(
2087 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2088 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2091 my $xml = $entry->marc;
2092 my $docid = $entry->id;
2093 my $marcdoc = $parser->parse_string($xml);
2094 my $modsdoc = $mods_sheet->transform($marcdoc);
2096 my $mods = $modsdoc->documentElement;
2097 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2099 $entry->fingerprint( fingerprint_mods( $mods ) );
2100 push @entry_list, $entry;
2102 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2105 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2106 if (!$mr || !@$mr) {
2107 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2108 $mr = new Fieldmapper::metabib::metarecord;
2109 $mr->fingerprint( $entry->fingerprint );
2110 $mr->master_record( $entry->id );
2111 my ($new_mr) = $mr_create->run($mr);
2113 unless (defined $mr) {
2114 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2117 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2122 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2123 $sm->metarecord( $mr->id );
2124 $sm->source( $entry->id );
2125 push @source_maps, $sm;
2128 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2129 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2131 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2132 for my $rd_field ( keys %descriptor_code ) {
2133 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2135 $rd_obj->record( $docid );
2136 push @rd_list, $rd_obj;
2138 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2140 # step 2: build the KOHA rows
2141 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2142 $_->record( $docid ) for (@tmp_list);
2143 push @ns_list, @tmp_list;
2147 last unless ($self->api_name =~ /batch$/o);
2150 $rm_old_rd->run( { record => \@docids } );
2151 $rm_old_fr->run( { record => \@docids } );
2152 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2153 $rm_old_tr->run( { source => \@docids } );
2154 $rm_old_ar->run( { source => \@docids } );
2155 $rm_old_sr->run( { source => \@docids } );
2156 $rm_old_kr->run( { source => \@docids } );
2157 $rm_old_ser->run( { source => \@docids } );
2160 my ($sm) = $create_source_map->run(@source_maps);
2161 unless (defined $sm) {
2162 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2164 my ($mr) = $mr_update->run(@mr_list);
2165 unless (defined $mr) {
2166 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2170 my ($re) = $update_entry->run(@entry_list);
2171 unless (defined $re) {
2172 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2175 my ($rd) = $rd_create->run(@rd_list);
2176 unless (defined $rd) {
2177 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2180 my ($fr) = $fr_create->run(@ns_list);
2181 unless (defined $fr) {
2182 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2185 # step 5: insert the new metadata
2186 for my $class ( qw/title author subject keyword series/ ) {
2188 for my $doc ( @mods_data ) {
2189 my ($did) = keys %$doc;
2190 my ($data) = values %$doc;
2192 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2193 for my $row ( keys %{ $$data{$class} } ) {
2194 next unless (exists $$data{$class}{$row});
2195 next unless ($$data{$class}{$row}{value});
2196 my $fm_obj = $fm_constructor->new;
2197 $fm_obj->value( $$data{$class}{$row}{value} );
2198 $fm_obj->field( $$data{$class}{$row}{field_id} );
2199 $fm_obj->source( $did );
2200 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2202 push @md_list, $fm_obj;
2206 my ($cr) = $$create{$class}->run(@md_list);
2207 unless (defined $cr) {
2208 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2212 unless ($outer_xact) {
2213 $log->debug("Commiting transaction started by the Ingest.", INFO);
2214 my ($c) = $commit->run;
2215 unless (defined $c and $c) {
2217 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2223 __PACKAGE__->register_method(
2224 api_name => "open-ils.worm.wormize",
2225 method => "wormize",
2229 __PACKAGE__->register_method(
2230 api_name => "open-ils.worm.wormize.no_map",
2231 method => "wormize",
2235 __PACKAGE__->register_method(
2236 api_name => "open-ils.worm.wormize.batch",
2237 method => "wormize",
2241 __PACKAGE__->register_method(
2242 api_name => "open-ils.worm.wormize.no_map.batch",
2243 method => "wormize",
2258 my $acreate_source_map;
2273 sub authority_wormize {
2280 if ($self->api_name =~ /no_map/o) {
2284 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2286 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2288 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2290 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2292 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2294 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2295 unless ($aupdate_entry);
2296 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2297 unless ($arm_old_rd);
2298 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2299 unless ($arm_old_fr);
2300 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2301 unless ($ard_create);
2302 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2303 unless ($afr_create);
2306 my ($outer_xact) = $in_xact->run;
2308 unless ($outer_xact) {
2309 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2310 my ($r) = $begin->run($client);
2311 unless (defined $r and $r) {
2313 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2316 } catch Error with {
2317 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2327 for my $entry ( $lookup->run(@docids) ) {
2328 # step -1: grab the doc from storage
2329 next unless ($entry);
2332 # my $xslt_doc = $parser->parse_file(
2333 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2334 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2337 my $xml = $entry->marc;
2338 my $docid = $entry->id;
2339 my $marcdoc = $parser->parse_string($xml);
2340 #my $madsdoc = $mads_sheet->transform($marcdoc);
2342 #my $mads = $madsdoc->documentElement;
2343 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2345 push @entry_list, $entry;
2347 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2348 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2350 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2351 for my $rd_field ( keys %descriptor_code ) {
2352 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2354 $rd_obj->record( $docid );
2355 push @rd_list, $rd_obj;
2357 # step 2: build the KOHA rows
2358 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2359 $_->record( $docid ) for (@tmp_list);
2360 push @ns_list, @tmp_list;
2364 last unless ($self->api_name =~ /batch$/o);
2367 $arm_old_rd->run( { record => \@docids } );
2368 $arm_old_fr->run( { record => \@docids } );
2370 my ($rd) = $ard_create->run(@rd_list);
2371 unless (defined $rd) {
2372 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2375 my ($fr) = $fr_create->run(@ns_list);
2376 unless (defined $fr) {
2377 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2380 unless ($outer_xact) {
2381 $log->debug("Commiting transaction started by Ingest.", INFO);
2382 my ($c) = $commit->run;
2383 unless (defined $c and $c) {
2385 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2391 __PACKAGE__->register_method(
2392 api_name => "open-ils.worm.authortiy.wormize",
2393 method => "wormize",
2397 __PACKAGE__->register_method(
2398 api_name => "open-ils.worm.authority.wormize.batch",
2399 method => "wormize",
2405 # --------------------------------------------------------------------------------
2408 sub _marcxml_to_full_rows {
2410 my $marcxml = shift;
2411 my $type = shift || 'Fieldmapper::metabib::full_rec';
2415 my $root = $marcxml->documentElement;
2417 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2418 next unless $tagline;
2420 my $ns = new Fieldmapper::metabib::full_rec;
2423 my $val = NFD($tagline->textContent);
2424 $val =~ s/(\pM+)//gso;
2430 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2431 next unless $tagline;
2433 my $ns = new Fieldmapper::metabib::full_rec;
2435 $ns->tag( $tagline->getAttribute( "tag" ) );
2436 my $val = NFD($tagline->textContent);
2437 $val =~ s/(\pM+)//gso;
2443 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2444 next unless $tagline;
2446 my $tag = $tagline->getAttribute( "tag" );
2447 my $ind1 = $tagline->getAttribute( "ind1" );
2448 my $ind2 = $tagline->getAttribute( "ind2" );
2450 for my $data ( $tagline->childNodes ) {
2453 my $ns = $type->new;
2458 $ns->subfield( $data->getAttribute( "code" ) );
2459 my $val = NFD($data->textContent);
2460 $val =~ s/(\pM+)//gso;
2461 $ns->value( lc($val) );
2469 sub _get_field_value {
2471 my( $root, $xpath ) = @_;
2475 # grab the set of matching nodes
2476 my @nodes = $root->findnodes( $xpath );
2477 for my $value (@nodes) {
2479 # grab all children of the node
2480 my @children = $value->childNodes();
2481 for my $child (@children) {
2483 # add the childs content to the growing buffer
2484 my $content = quotemeta($child->textContent);
2485 next if ($string =~ /$content/); # uniquify the values
2486 $string .= $child->textContent . " ";
2489 $string .= $value->textContent . " ";
2492 $string = NFD($string);
2493 $string =~ s/(\pM)//gso;
2498 sub modsdoc_to_values {
2499 my( $self, $mods ) = @_;
2501 for my $class (keys %$xpathset) {
2502 $data->{$class} = {};
2503 for my $type (keys %{$xpathset->{$class}}) {
2504 $data->{$class}->{$type} = {};
2505 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};