1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
30 our $log = 'OpenSRF::Utils::Logger';
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
43 unless (keys %$xpathset) {
44 $log->debug("Running post_init", DEBUG);
46 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
48 unless ($supported_formats{mods}{xslt}) {
49 $log->debug("Loading MODS XSLT", DEBUG);
50 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
54 unless ($supported_formats{mods3}{xslt}) {
55 $log->debug("Loading MODS v3 XSLT", DEBUG);
56 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61 my $req = OpenSRF::AppSession
62 ->create('open-ils.cstore')
63 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
66 if (ref $req and @$req) {
68 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
87 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
91 sub ro_biblio_ingest_single_object {
95 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
97 my $document = $parser->parse_string($xml);
99 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
100 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
101 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
102 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
104 $_->source($bib->id) for (@mXfe);
105 $_->record($bib->id) for (@mfr);
106 $rd->record($bib->id) if ($rd);
108 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
110 __PACKAGE__->register_method(
111 api_name => "open-ils.ingest.full.biblio.object.readonly",
112 method => "ro_biblio_ingest_single_object",
117 sub ro_biblio_ingest_single_xml {
120 my $xml = OpenILS::Application::Ingest::entityize(shift);
122 my $document = $parser->parse_string($xml);
124 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
125 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
126 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
127 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
129 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
131 __PACKAGE__->register_method(
132 api_name => "open-ils.ingest.full.biblio.xml.readonly",
133 method => "ro_biblio_ingest_single_xml",
138 sub ro_biblio_ingest_single_record {
143 OpenILS::Application::Ingest->post_init();
144 my $r = OpenSRF::AppSession
145 ->create('open-ils.cstore')
146 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
149 return undef unless ($r and @$r);
151 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
153 $_->source($rec) for (@{$res->{field_entries}});
154 $_->record($rec) for (@{$res->{full_rec}});
155 $res->{descriptor}->record($rec);
159 __PACKAGE__->register_method(
160 api_name => "open-ils.ingest.full.biblio.record.readonly",
161 method => "ro_biblio_ingest_single_record",
166 sub ro_biblio_ingest_stream_record {
170 OpenILS::Application::Ingest->post_init();
172 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
174 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
176 my $rec = $resp->content;
177 last unless (defined $rec);
179 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
180 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
182 $_->source($rec) for (@{$res->{field_entries}});
183 $_->record($rec) for (@{$res->{full_rec}});
185 $client->respond( $res );
190 __PACKAGE__->register_method(
191 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
192 method => "ro_biblio_ingest_stream_record",
197 sub ro_biblio_ingest_stream_xml {
201 OpenILS::Application::Ingest->post_init();
203 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
205 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
207 my $xml = $resp->content;
208 last unless (defined $xml);
210 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
211 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
213 $client->respond( $res );
218 __PACKAGE__->register_method(
219 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
220 method => "ro_biblio_ingest_stream_xml",
225 sub rw_biblio_ingest_stream_import {
229 OpenILS::Application::Ingest->post_init();
231 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
233 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
235 my $bib = $resp->content;
236 last unless (defined $bib);
238 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
239 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
241 $_->source($bib->id) for (@{$res->{field_entries}});
242 $_->record($bib->id) for (@{$res->{full_rec}});
244 $client->respond( $res );
249 __PACKAGE__->register_method(
250 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
251 method => "rw_biblio_ingest_stream_import",
257 # --------------------------------------------------------------------------------
258 # MARC index extraction
260 package OpenILS::Application::Ingest::XPATH;
261 use base qw/OpenILS::Application::Ingest/;
262 use Unicode::Normalize;
264 # give this an XML documentElement and an XPATH expression
265 sub xpath_to_string {
269 my $ns_prefix = shift;
272 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
276 # grab the set of matching nodes
277 my @nodes = $xml->findnodes( $xpath );
278 for my $value (@nodes) {
280 # grab all children of the node
281 my @children = $value->childNodes();
282 for my $child (@children) {
284 # add the childs content to the growing buffer
285 my $content = quotemeta($child->textContent);
286 next if ($unique && $string =~ /$content/); # uniquify the values
287 $string .= $child->textContent . " ";
290 $string .= $value->textContent . " ";
296 sub class_index_string_xml {
302 OpenILS::Application::Ingest->post_init();
303 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
307 for my $class (@classes) {
308 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
309 for my $type ( keys %{ $xpathset->{$class} } ) {
311 my $def = $xpathset->{$class}->{$type};
312 my $sf = $supported_formats{$def->{format}};
317 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
318 $transform_cache{$def->{format}} = $document;
321 my $value = xpath_to_string(
322 $document->documentElement => $def->{xpath},
323 $sf->{ns} => $def->{format},
329 $value =~ s/\pM+//sgo;
330 $value =~ s/\pC+//sgo;
331 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
333 $value =~ s/(\w)\./$1/sgo;
336 my $fm = $class_constructor->new;
337 $fm->value( $value );
338 $fm->field( $xpathset->{$class}->{$type}->{id} );
339 $client->respond($fm);
344 __PACKAGE__->register_method(
345 api_name => "open-ils.ingest.field_entry.class.xml",
346 method => "class_index_string_xml",
352 sub class_index_string_record {
358 OpenILS::Application::Ingest->post_init();
359 my $r = OpenSRF::AppSession
360 ->create('open-ils.cstore')
361 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
364 return undef unless ($r and @$r);
366 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
368 $client->respond($fm);
372 __PACKAGE__->register_method(
373 api_name => "open-ils.ingest.field_entry.class.record",
374 method => "class_index_string_record",
380 sub all_index_string_xml {
385 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
386 $client->respond($fm);
390 __PACKAGE__->register_method(
391 api_name => "open-ils.ingest.extract.field_entry.all.xml",
392 method => "all_index_string_xml",
398 sub all_index_string_record {
403 OpenILS::Application::Ingest->post_init();
404 my $r = OpenSRF::AppSession
405 ->create('open-ils.cstore')
406 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
409 return undef unless ($r and @$r);
411 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
413 $client->respond($fm);
417 __PACKAGE__->register_method(
418 api_name => "open-ils.ingest.extract.field_entry.all.record",
419 method => "all_index_string_record",
425 # --------------------------------------------------------------------------------
428 package OpenILS::Application::Ingest::FlatMARC;
429 use base qw/OpenILS::Application::Ingest/;
430 use Unicode::Normalize;
433 sub _marcxml_to_full_rows {
436 my $xmltype = shift || 'metabib';
438 my $type = "Fieldmapper::${xmltype}::full_rec";
442 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
444 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
445 next unless $tagline;
450 my $val = $tagline->textContent;
452 $val =~ s/(\pM+)//gso;
458 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
459 next unless $tagline;
463 $ns->tag( $tagline->getAttribute( "tag" ) );
464 my $val = $tagline->textContent;
466 $val =~ s/(\pM+)//gso;
472 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
473 next unless $tagline;
475 my $tag = $tagline->getAttribute( "tag" );
476 my $ind1 = $tagline->getAttribute( "ind1" );
477 my $ind2 = $tagline->getAttribute( "ind2" );
479 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
487 $ns->subfield( $data->getAttribute( "code" ) );
488 my $val = $data->textContent;
490 $val =~ s/(\pM+)//gso;
491 $ns->value( lc($val) );
497 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
506 $log->debug("processing [$xml]");
508 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
510 my $type = 'metabib';
511 $type = 'authority' if ($self->api_name =~ /authority/o);
513 OpenILS::Application::Ingest->post_init();
515 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
518 __PACKAGE__->register_method(
519 api_name => "open-ils.ingest.flat_marc.authority.xml",
520 method => "flat_marc_xml",
525 __PACKAGE__->register_method(
526 api_name => "open-ils.ingest.flat_marc.biblio.xml",
527 method => "flat_marc_xml",
533 sub flat_marc_record {
539 $type = 'authority' if ($self->api_name =~ /authority/o);
541 OpenILS::Application::Ingest->post_init();
542 my $r = OpenSRF::AppSession
543 ->create('open-ils.cstore')
544 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
548 return undef unless ($r and $r->marc);
550 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
551 for my $row (@rows) {
552 $client->respond($row);
553 $log->debug(JSON->perl2JSON($row), DEBUG);
557 __PACKAGE__->register_method(
558 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
559 method => "flat_marc_record",
564 __PACKAGE__->register_method(
565 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
566 method => "flat_marc_record",
572 # --------------------------------------------------------------------------------
575 package OpenILS::Application::Ingest::Biblio::Fingerprint;
576 use base qw/OpenILS::Application::Ingest/;
577 use Unicode::Normalize;
578 use OpenSRF::EX qw/:try/;
580 sub biblio_fingerprint_record {
585 OpenILS::Application::Ingest->post_init();
587 my $r = OpenSRF::AppSession
588 ->create('open-ils.cstore')
589 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
592 return undef unless ($r and $r->marc);
594 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
595 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
598 __PACKAGE__->register_method(
599 api_name => "open-ils.ingest.fingerprint.record",
600 method => "biblio_fingerprint_record",
606 sub biblio_fingerprint {
609 my $xml = OpenILS::Application::Ingest::entityize(shift);
611 $log->internal("Got MARC [$xml]");
614 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
615 my $conf = OpenSRF::Utils::SettingsClient->new;
617 my $libs = $conf->config_value(@pfx, 'script_path');
618 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
619 my $script_libs = (ref($libs)) ? $libs : [$libs];
621 $log->debug("Loading script $script_file for biblio fingerprinting...");
623 $fp_script = new OpenILS::Utils::ScriptRunner
624 ( file => $script_file,
625 paths => $script_libs,
626 reset_count => 100 );
629 $fp_script->insert('environment' => {marc => $xml} => 1);
631 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
632 $log->debug("Script for biblio fingerprinting completed successfully...");
636 __PACKAGE__->register_method(
637 api_name => "open-ils.ingest.fingerprint.xml",
638 method => "biblio_fingerprint",
644 sub biblio_descriptor {
647 my $xml = OpenILS::Application::Ingest::entityize(shift);
649 $log->internal("Got MARC [$xml]");
652 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
653 my $conf = OpenSRF::Utils::SettingsClient->new;
655 my $libs = $conf->config_value(@pfx, 'script_path');
656 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
657 my $script_libs = (ref($libs)) ? $libs : [$libs];
659 $log->debug("Loading script $script_file for biblio descriptor extraction...");
661 $rd_script = new OpenILS::Utils::ScriptRunner
662 ( file => $script_file,
663 paths => $script_libs,
664 reset_count => 100 );
667 $log->debug("Setting up environment for descriptor extraction script...");
668 $rd_script->insert('environment.marc' => $xml => 1);
669 $log->debug("Environment building complete...");
671 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
672 $log->debug("Script for biblio descriptor extraction completed successfully");
676 __PACKAGE__->register_method(
677 api_name => "open-ils.ingest.descriptor.xml",
678 method => "biblio_descriptor",
689 OpenILS::Application::Ingest->post_init();
690 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
693 sub begin_transaction {
697 OpenILS::Application::Ingest->post_init();
698 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
702 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
703 #__PACKAGE__->st_sess->connect;
704 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
705 unless (defined $r and $r) {
706 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
707 #__PACKAGE__->st_sess->disconnect;
708 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
712 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
715 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
718 sub rollback_transaction {
722 OpenILS::Application::Ingest->post_init();
723 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
727 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
729 $log->debug("Ingest isn't inside a transaction.", INFO);
732 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
738 sub commit_transaction {
742 OpenILS::Application::Ingest->post_init();
743 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
746 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
748 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
749 unless (defined $r and $r) {
750 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
751 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
753 #__PACKAGE__->st_sess->disconnect;
755 $log->debug("Ingest isn't inside a transaction.", INFO);
758 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
767 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
768 return shift( @res );
771 sub scrub_authority_record {
777 if (!OpenILS::Application::Ingest->in_transaction) {
778 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
784 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
786 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
787 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
789 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
791 $log->debug('Scrubbing failed : '.shift(), ERROR);
792 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
796 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
797 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
800 __PACKAGE__->register_method(
801 api_name => "open-ils.worm.scrub.authority",
802 method => "scrub_authority_record",
808 sub scrub_metabib_record {
813 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
814 $rec = OpenILS::Application::Ingest->storage_req(
815 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
820 if (!OpenILS::Application::Ingest->in_transaction) {
821 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
827 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
829 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
830 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
831 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
832 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
833 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
834 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
835 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
836 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
838 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
839 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
841 for my $mr (@$masters) {
842 $log->debug( "Found metarecord whose master is $rec", DEBUG);
843 my $others = OpenILS::Application::Ingest->storage_req(
844 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
847 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
848 $mr->master_record($others->[0]->source);
849 OpenILS::Application::Ingest->storage_req(
850 'open-ils.storage.direct.metabib.metarecord.remote_update',
852 { master_record => $others->[0]->source, mods => undef }
855 warn "Removing metarecord whose master is $rec";
856 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
857 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
858 warn "Metarecord removed";
859 $log->debug( "Metarecord removed", DEBUG);
863 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
866 $log->debug('Scrubbing failed : '.shift(), ERROR);
867 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
871 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
872 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
875 __PACKAGE__->register_method(
876 api_name => "open-ils.worm.scrub.biblio",
877 method => "scrub_metabib_record",
882 sub wormize_biblio_metarecord {
887 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
893 $success = wormize_biblio_record($self => $client => $r->source);
895 { record => $r->source,
896 metarecord => $rec->metarecord,
903 { record => $r->source,
904 metarecord => $rec->metarecord,
913 __PACKAGE__->register_method(
914 api_name => "open-ils.worm.wormize.metarecord",
915 method => "wormize_biblio_metarecord",
920 __PACKAGE__->register_method(
921 api_name => "open-ils.worm.wormize.metarecord.nomap",
922 method => "wormize_biblio_metarecord",
927 __PACKAGE__->register_method(
928 api_name => "open-ils.worm.wormize.metarecord.noscrub",
929 method => "wormize_biblio_metarecord",
934 __PACKAGE__->register_method(
935 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
936 method => "wormize_biblio_metarecord",
943 sub wormize_biblio_record {
948 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
949 $rec = OpenILS::Application::Ingest->storage_req(
950 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
956 if (!OpenILS::Application::Ingest->in_transaction) {
957 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
964 unless ($self->api_name =~ /noscrub/o) {
965 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
969 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
972 my @rec_descriptor = ();
984 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
986 my $xml = $parser->parse_string($r->marc);
988 #update the fingerprint
989 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
990 OpenILS::Application::Ingest->storage_req(
991 'open-ils.storage.direct.biblio.record_entry.remote_update',
993 { fingerprint => $fp->{fingerprint},
994 quality => int($fp->{quality}) }
995 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
998 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
999 $fr->record( $r->id );
1000 push @full_rec, $fr;
1003 # the rec_descriptor stuff
1004 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1005 $rd->record( $r->id );
1006 push @rec_descriptor, $rd;
1008 # the indexing field entry stuff
1009 for my $class ( qw/title author subject keyword series/ ) {
1010 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1011 $fe->source( $r->id );
1012 push @{$field_entry{$class}}, $fe;
1016 unless ($self->api_name =~ /nomap/o) {
1017 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1020 $mr = Fieldmapper::metabib::metarecord->new;
1021 $mr->fingerprint( $fp->{fingerprint} );
1022 $mr->master_record( $r->id );
1023 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1026 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1027 $mr_map->metarecord( $mr->id );
1028 $mr_map->source( $r->id );
1029 push @source_map, $mr_map;
1031 $metarecord{$mr->id} = $mr;
1033 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1035 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1036 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1041 if (@rec_descriptor) {
1042 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1044 OpenILS::Application::Ingest->storage_req(
1045 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1049 for my $mr ( values %metarecord ) {
1050 my $sources = OpenILS::Application::Ingest->storage_req(
1051 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1055 my $bibs = OpenILS::Application::Ingest->storage_req(
1056 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1057 [ map { $_->source } @$sources ]
1060 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1062 OpenILS::Application::Ingest->storage_req(
1063 'open-ils.storage.direct.metabib.metarecord.remote_update',
1065 { master_record => $master->id, mods => undef }
1069 OpenILS::Application::Ingest->storage_req(
1070 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1072 ) if (@rec_descriptor);
1074 OpenILS::Application::Ingest->storage_req(
1075 'open-ils.storage.direct.metabib.full_rec.batch.create',
1079 OpenILS::Application::Ingest->storage_req(
1080 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1081 @{ $field_entry{title} }
1082 ) if (@{ $field_entry{title} });
1084 OpenILS::Application::Ingest->storage_req(
1085 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1086 @{ $field_entry{author} }
1087 ) if (@{ $field_entry{author} });
1089 OpenILS::Application::Ingest->storage_req(
1090 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1091 @{ $field_entry{subject} }
1092 ) if (@{ $field_entry{subject} });
1094 OpenILS::Application::Ingest->storage_req(
1095 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1096 @{ $field_entry{keyword} }
1097 ) if (@{ $field_entry{keyword} });
1099 OpenILS::Application::Ingest->storage_req(
1100 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1101 @{ $field_entry{series} }
1102 ) if (@{ $field_entry{series} });
1104 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1110 $log->debug('Wormization failed : '.shift(), ERROR);
1111 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1115 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1116 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1119 __PACKAGE__->register_method(
1120 api_name => "open-ils.worm.wormize.biblio",
1121 method => "wormize_biblio_record",
1125 __PACKAGE__->register_method(
1126 api_name => "open-ils.worm.wormize.biblio.nomap",
1127 method => "wormize_biblio_record",
1131 __PACKAGE__->register_method(
1132 api_name => "open-ils.worm.wormize.biblio.noscrub",
1133 method => "wormize_biblio_record",
1137 __PACKAGE__->register_method(
1138 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1139 method => "wormize_biblio_record",
1144 sub wormize_authority_record {
1150 if (!OpenILS::Application::Ingest->in_transaction) {
1151 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1157 # clean up the cruft
1158 unless ($self->api_name =~ /noscrub/o) {
1159 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1163 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1166 my @rec_descriptor = ();
1167 for my $r (@$bibs) {
1168 my $xml = $parser->parse_string($r->marc);
1170 # the full_rec stuff
1171 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1172 $fr->record( $r->id );
1173 push @full_rec, $fr;
1176 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1177 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1178 #$rd->record( $r->id );
1179 #push @rec_descriptor, $rd;
1183 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1185 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1186 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1188 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1191 $log->debug('Wormization failed : '.shift(), ERROR);
1192 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1196 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1197 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1200 __PACKAGE__->register_method(
1201 api_name => "open-ils.worm.wormize.authority",
1202 method => "wormize_authority_record",
1206 __PACKAGE__->register_method(
1207 api_name => "open-ils.worm.wormize.authority.noscrub",
1208 method => "wormize_authority_record",
1214 # --------------------------------------------------------------------------------
1215 # MARC index extraction
1217 package OpenILS::Application::Ingest::XPATH;
1218 use base qw/OpenILS::Application::Ingest/;
1219 use Unicode::Normalize;
1221 # give this a MODS documentElement and an XPATH expression
1222 sub _xpath_to_string {
1226 my $ns_prefix = shift;
1229 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1233 # grab the set of matching nodes
1234 my @nodes = $xml->findnodes( $xpath );
1235 for my $value (@nodes) {
1237 # grab all children of the node
1238 my @children = $value->childNodes();
1239 for my $child (@children) {
1241 # add the childs content to the growing buffer
1242 my $content = quotemeta($child->textContent);
1243 next if ($unique && $string =~ /$content/); # uniquify the values
1244 $string .= $child->textContent . " ";
1247 $string .= $value->textContent . " ";
1250 return NFD($string);
1253 sub class_all_index_string_xml {
1259 OpenILS::Application::Ingest->post_init();
1260 $xml = $parser->parse_string($xml) unless (ref $xml);
1262 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1263 for my $type ( keys %{ $xpathset->{$class} } ) {
1264 my $value = _xpath_to_string(
1265 $mods_sheet->transform($xml)->documentElement,
1266 $xpathset->{$class}->{$type}->{xpath},
1267 "http://www.loc.gov/mods/",
1274 $value =~ s/\pM+//sgo;
1275 $value =~ s/\pC+//sgo;
1276 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1278 $value =~ s/(\w)\./$1/sgo;
1279 $value = lc($value);
1281 my $fm = $class_constructor->new;
1282 $fm->value( $value );
1283 $fm->field( $xpathset->{$class}->{$type}->{id} );
1284 $client->respond($fm);
1288 __PACKAGE__->register_method(
1289 api_name => "open-ils.worm.field_entry.class.xml",
1290 method => "class_all_index_string_xml",
1296 sub class_all_index_string_record {
1302 OpenILS::Application::Ingest->post_init();
1303 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1305 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1307 $client->respond($fm);
1311 __PACKAGE__->register_method(
1312 api_name => "open-ils.worm.field_entry.class.record",
1313 method => "class_all_index_string_record",
1320 sub class_index_string_xml {
1327 OpenILS::Application::Ingest->post_init();
1328 $xml = $parser->parse_string($xml) unless (ref $xml);
1329 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1331 __PACKAGE__->register_method(
1332 api_name => "open-ils.worm.class.type.xml",
1333 method => "class_index_string_xml",
1338 sub class_index_string_record {
1345 OpenILS::Application::Ingest->post_init();
1346 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1348 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1349 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1352 __PACKAGE__->register_method(
1353 api_name => "open-ils.worm.class.type.record",
1354 method => "class_index_string_record",
1368 OpenILS::Application::Ingest->post_init();
1369 $xml = $parser->parse_string($xml) unless (ref $xml);
1370 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1372 __PACKAGE__->register_method(
1373 api_name => "open-ils.worm.xpath.xml",
1374 method => "xml_xpath",
1388 OpenILS::Application::Ingest->post_init();
1389 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1391 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1392 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1395 __PACKAGE__->register_method(
1396 api_name => "open-ils.worm.xpath.record",
1397 method => "record_xpath",
1403 # --------------------------------------------------------------------------------
1406 package OpenILS::Application::Ingest::Biblio::Leader;
1407 use base qw/OpenILS::Application::Ingest/;
1408 use Unicode::Normalize;
1410 our %marc_type_groups = (
1413 VIS => q/[gkro]{1}/,
1422 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1426 our %biblio_descriptor_code = (
1427 item_type => sub { substr($ldr,6,1); },
1430 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1431 return substr($oo8,29,1);
1432 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1433 return substr($oo8,23,1);
1437 bib_level => sub { substr($ldr,7,1); },
1438 control_type => sub { substr($ldr,8,1); },
1439 char_encoding => sub { substr($ldr,9,1); },
1440 enc_level => sub { substr($ldr,17,1); },
1441 cat_form => sub { substr($ldr,18,1); },
1442 pub_status => sub { substr($ldr,5,1); },
1443 item_lang => sub { substr($oo8,35,3); },
1444 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1445 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1446 audience => sub { substr($oo8,22,1); },
1449 sub _extract_biblio_descriptors {
1452 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1453 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1454 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1456 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1457 for my $rd_field ( keys %biblio_descriptor_code ) {
1458 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1464 sub extract_biblio_desc_xml {
1469 $xml = $parser->parse_string($xml) unless (ref $xml);
1471 return _extract_biblio_descriptors( $xml );
1473 __PACKAGE__->register_method(
1474 api_name => "open-ils.worm.biblio_leader.xml",
1475 method => "extract_biblio_desc_xml",
1480 sub extract_biblio_desc_record {
1485 OpenILS::Application::Ingest->post_init();
1486 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1488 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1489 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1492 __PACKAGE__->register_method(
1493 api_name => "open-ils.worm.biblio_leader.record",
1494 method => "extract_biblio_desc_record",
1499 # --------------------------------------------------------------------------------
1502 package OpenILS::Application::Ingest::FlatMARC;
1503 use base qw/OpenILS::Application::Ingest/;
1504 use Unicode::Normalize;
1507 sub _marcxml_to_full_rows {
1509 my $marcxml = shift;
1510 my $xmltype = shift || 'metabib';
1512 my $type = "Fieldmapper::${xmltype}::full_rec";
1516 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1518 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1519 next unless $tagline;
1521 my $ns = $type->new;
1524 my $val = $tagline->textContent;
1526 $val =~ s/(\pM+)//gso;
1532 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1533 next unless $tagline;
1535 my $ns = $type->new;
1537 $ns->tag( $tagline->getAttribute( "tag" ) );
1538 my $val = $tagline->textContent;
1540 $val =~ s/(\pM+)//gso;
1546 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1547 next unless $tagline;
1549 my $tag = $tagline->getAttribute( "tag" );
1550 my $ind1 = $tagline->getAttribute( "ind1" );
1551 my $ind2 = $tagline->getAttribute( "ind2" );
1553 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1556 my $ns = $type->new;
1561 $ns->subfield( $data->getAttribute( "code" ) );
1562 my $val = $data->textContent;
1564 $val =~ s/(\pM+)//gso;
1565 $ns->value( lc($val) );
1571 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1580 $xml = $parser->parse_string($xml) unless (ref $xml);
1582 my $type = 'metabib';
1583 $type = 'authority' if ($self->api_name =~ /authority/o);
1585 OpenILS::Application::Ingest->post_init();
1587 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1590 __PACKAGE__->register_method(
1591 api_name => "open-ils.worm.flat_marc.authority.xml",
1592 method => "flat_marc_xml",
1597 __PACKAGE__->register_method(
1598 api_name => "open-ils.worm.flat_marc.biblio.xml",
1599 method => "flat_marc_xml",
1605 sub flat_marc_record {
1610 my $type = 'biblio';
1611 $type = 'authority' if ($self->api_name =~ /authority/o);
1613 OpenILS::Application::Ingest->post_init();
1614 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1616 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1619 __PACKAGE__->register_method(
1620 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1621 method => "flat_marc_record",
1626 __PACKAGE__->register_method(
1627 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1628 method => "flat_marc_record",
1635 # --------------------------------------------------------------------------------
1638 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1639 use base qw/OpenILS::Application::Ingest/;
1640 use Unicode::Normalize;
1641 use OpenSRF::EX qw/:try/;
1643 my @fp_mods_xpath = (
1644 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1647 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1648 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1649 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1650 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1653 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1655 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1656 $text =~ s/\pM+//gso;
1657 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1659 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1660 $text =~ s/\s+/ /sgo;
1661 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1662 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1663 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1664 $text =~ s/\b(?:the|an?)\b//sgo;
1665 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1666 $text =~ s/\[.[^\]]+\]//sgo;
1667 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1668 $text =~ s/\s*[;\/\.]*$//sgo;
1669 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1674 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1675 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1678 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1680 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1681 $text =~ s/\pM+//gso;
1682 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1684 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1685 $text =~ s/\s+/ /sgo;
1686 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1687 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1688 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1689 $text =~ s/,?\s+.*$//sgo;
1690 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1695 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1698 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1699 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1700 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1701 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1702 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1703 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1704 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1705 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1708 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1710 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1711 $text =~ s/\pM+//gso;
1712 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1714 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1715 $text =~ s/\s+/ /sgo;
1716 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1717 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1718 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1719 $text =~ s/\b(?:the|an?)\b//sgo;
1720 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1721 $text =~ s/\[.[^\]]+\]//sgo;
1722 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1723 $text =~ s/\s*[;\/\.]*$//sgo;
1724 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1729 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1730 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1731 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1732 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1735 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1737 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1738 $text =~ s/\pM+//gso;
1739 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1741 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1742 $text =~ s/\s+/ /sgo;
1743 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1744 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1745 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1746 $text =~ s/,?\s+.*$//sgo;
1747 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1754 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1758 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1762 my $match_index = 0;
1763 my $block_index = 1;
1764 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1765 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1767 my $block_name_index = 0;
1768 my $block_value_index = 1;
1769 my $block = $fp_mods_xpath[$block_index];
1770 while ( my $part = $$block[$block_value_index] ) {
1772 for my $xpath ( @{ $part->{xpath} } ) {
1773 $text = $mods->findvalue( $xpath );
1777 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1781 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1782 $fp_string .= $text;
1785 $block_name_index += 2;
1786 $block_value_index += 2;
1790 $fp_string =~ s/\W+//gso;
1791 $log->debug("Fingerprint is [$fp_string]", INFO);;
1801 sub refingerprint_bibrec {
1807 if (!OpenILS::Application::Ingest->in_transaction) {
1808 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1814 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1815 for my $b (@$bibs) {
1816 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1818 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1820 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1822 OpenILS::Application::Ingest->storage_req(
1823 'open-ils.storage.direct.biblio.record_entry.remote_update',
1825 { fingerprint => $fp->{fingerprint},
1826 quality => $fp->{quality} }
1829 if ($self->api_name !~ /nomap/o) {
1830 my $old_source_map = OpenILS::Application::Ingest->storage_req(
1831 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1836 if (ref($old_source_map) and @$old_source_map) {
1837 for my $m (@$old_source_map) {
1838 $old_mrid = $m->metarecord;
1839 OpenILS::Application::Ingest->storage_req(
1840 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1846 my $old_sm = OpenILS::Application::Ingest->storage_req(
1847 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1848 { metarecord => $old_mrid }
1851 if (ref($old_sm) and @$old_sm == 0) {
1852 OpenILS::Application::Ingest->storage_req(
1853 'open-ils.storage.direct.metabib.metarecord.delete',
1858 my $mr = OpenILS::Application::Ingest->storage_req(
1859 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1860 { fingerprint => $fp->{fingerprint} }
1864 $mr = Fieldmapper::metabib::metarecord->new;
1865 $mr->fingerprint( $fp->{fingerprint} );
1866 $mr->master_record( $b->id );
1867 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1870 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1871 $mr_map->metarecord( $mr->id );
1872 $mr_map->source( $b->id );
1873 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1877 $client->respond($b->id);
1881 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1885 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1886 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1889 __PACKAGE__->register_method(
1890 api_name => "open-ils.worm.fingerprint.record.update",
1891 method => "refingerprint_bibrec",
1897 __PACKAGE__->register_method(
1898 api_name => "open-ils.worm.fingerprint.record.update.nomap",
1899 method => "refingerprint_bibrec",
1906 sub fingerprint_bibrec {
1911 OpenILS::Application::Ingest->post_init();
1912 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1914 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1915 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1919 __PACKAGE__->register_method(
1920 api_name => "open-ils.worm.fingerprint.record",
1921 method => "fingerprint_bibrec",
1927 sub fingerprint_mods {
1932 OpenILS::Application::Ingest->post_init();
1933 my $mods = $parser->parse_string($xml)->documentElement;
1935 return _fp_mods( $mods );
1937 __PACKAGE__->register_method(
1938 api_name => "open-ils.worm.fingerprint.mods",
1939 method => "fingerprint_mods",
1944 sub fingerprint_marc {
1949 $xml = $parser->parse_string($xml) unless (ref $xml);
1951 OpenILS::Application::Ingest->post_init();
1952 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1953 $log->debug("Returning [$fp] as fingerprint", INFO);
1956 __PACKAGE__->register_method(
1957 api_name => "open-ils.worm.fingerprint.marc",
1958 method => "fingerprint_marc",
1966 sub biblio_fingerprint_record {
1971 OpenILS::Application::Ingest->post_init();
1973 my $marc = OpenILS::Application::Ingest
1974 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1977 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1978 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1981 __PACKAGE__->register_method(
1982 api_name => "open-ils.worm.fingerprint.record",
1983 method => "biblio_fingerprint_record",
1989 sub biblio_fingerprint {
1994 OpenILS::Application::Ingest->post_init();
1996 $marc = $parser->parse_string($marc) unless (ref $marc);
1998 my $mods = OpenILS::Application::Ingest::entityize(
2000 ->transform( $marc )
2006 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2009 $log->internal("Got MARC [$marc]");
2010 $log->internal("Created MODS [$mods]");
2013 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2014 my $conf = OpenSRF::Utils::SettingsClient->new;
2016 my $libs = $conf->config_value(@pfx, 'script_path');
2017 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2018 my $script_libs = (ref($libs)) ? $libs : [$libs];
2020 $log->debug("Loading script $script_file for biblio fingerprinting...");
2022 $fp_script = new OpenILS::Utils::ScriptRunner
2023 ( file => $script_file,
2024 paths => $script_libs,
2025 reset_count => 1000 );
2028 $log->debug("Applying environment for biblio fingerprinting...");
2030 my $env = {marc => $marc, mods => $mods};
2031 #my $res = {fingerprint => '', quality => '0'};
2033 $fp_script->insert('environment' => $env);
2034 #$fp_script->insert('result' => $res);
2036 $log->debug("Running script for biblio fingerprinting...");
2038 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2040 $log->debug("Script for biblio fingerprinting completed successfully...");
2044 __PACKAGE__->register_method(
2045 api_name => "open-ils.worm.fingerprint.marc",
2046 method => "biblio_fingerprint",
2051 # --------------------------------------------------------------------------------
2065 my $create_source_map;
2080 my %descriptor_code = (
2081 item_type => 'substr($ldr,6,1)',
2082 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2083 bib_level => 'substr($ldr,7,1)',
2084 control_type => 'substr($ldr,8,1)',
2085 char_encoding => 'substr($ldr,9,1)',
2086 enc_level => 'substr($ldr,17,1)',
2087 cat_form => 'substr($ldr,18,1)',
2088 pub_status => 'substr($ldr,5,1)',
2089 item_lang => 'substr($oo8,35,3)',
2090 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2091 audience => 'substr($oo8,22,1)',
2101 if ($self->api_name =~ /no_map/o) {
2105 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2107 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2109 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2111 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2113 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2114 unless ($sm_lookup);
2115 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2116 unless ($mr_lookup);
2117 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2118 unless ($mr_update);
2119 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2121 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2122 unless ($update_entry);
2123 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2124 unless ($rm_old_sm);
2125 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2126 unless ($rm_old_rd);
2127 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2128 unless ($rm_old_fr);
2129 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2130 unless ($rm_old_tr);
2131 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2132 unless ($rm_old_ar);
2133 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2134 unless ($rm_old_sr);
2135 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2136 unless ($rm_old_kr);
2137 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2138 unless ($rm_old_ser);
2139 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2140 unless ($mr_create);
2141 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2142 unless ($create_source_map);
2143 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2144 unless ($rd_create);
2145 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2146 unless ($fr_create);
2147 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2148 unless ($$create{title});
2149 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2150 unless ($$create{author});
2151 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2152 unless ($$create{subject});
2153 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2154 unless ($$create{keyword});
2155 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2156 unless ($$create{series});
2159 my ($outer_xact) = $in_xact->run;
2161 unless ($outer_xact) {
2162 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2163 my ($r) = $begin->run($client);
2164 unless (defined $r and $r) {
2166 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2169 } catch Error with {
2170 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2180 for my $entry ( $lookup->run(@docids) ) {
2181 # step -1: grab the doc from storage
2182 next unless ($entry);
2185 my $xslt_doc = $parser->parse_file(
2186 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2187 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2190 my $xml = $entry->marc;
2191 my $docid = $entry->id;
2192 my $marcdoc = $parser->parse_string($xml);
2193 my $modsdoc = $mods_sheet->transform($marcdoc);
2195 my $mods = $modsdoc->documentElement;
2196 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2198 $entry->fingerprint( fingerprint_mods( $mods ) );
2199 push @entry_list, $entry;
2201 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2204 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2205 if (!$mr || !@$mr) {
2206 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2207 $mr = new Fieldmapper::metabib::metarecord;
2208 $mr->fingerprint( $entry->fingerprint );
2209 $mr->master_record( $entry->id );
2210 my ($new_mr) = $mr_create->run($mr);
2212 unless (defined $mr) {
2213 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2216 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2221 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2222 $sm->metarecord( $mr->id );
2223 $sm->source( $entry->id );
2224 push @source_maps, $sm;
2227 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2228 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2230 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2231 for my $rd_field ( keys %descriptor_code ) {
2232 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2234 $rd_obj->record( $docid );
2235 push @rd_list, $rd_obj;
2237 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2239 # step 2: build the KOHA rows
2240 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2241 $_->record( $docid ) for (@tmp_list);
2242 push @ns_list, @tmp_list;
2246 last unless ($self->api_name =~ /batch$/o);
2249 $rm_old_rd->run( { record => \@docids } );
2250 $rm_old_fr->run( { record => \@docids } );
2251 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2252 $rm_old_tr->run( { source => \@docids } );
2253 $rm_old_ar->run( { source => \@docids } );
2254 $rm_old_sr->run( { source => \@docids } );
2255 $rm_old_kr->run( { source => \@docids } );
2256 $rm_old_ser->run( { source => \@docids } );
2259 my ($sm) = $create_source_map->run(@source_maps);
2260 unless (defined $sm) {
2261 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2263 my ($mr) = $mr_update->run(@mr_list);
2264 unless (defined $mr) {
2265 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2269 my ($re) = $update_entry->run(@entry_list);
2270 unless (defined $re) {
2271 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2274 my ($rd) = $rd_create->run(@rd_list);
2275 unless (defined $rd) {
2276 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2279 my ($fr) = $fr_create->run(@ns_list);
2280 unless (defined $fr) {
2281 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2284 # step 5: insert the new metadata
2285 for my $class ( qw/title author subject keyword series/ ) {
2287 for my $doc ( @mods_data ) {
2288 my ($did) = keys %$doc;
2289 my ($data) = values %$doc;
2291 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2292 for my $row ( keys %{ $$data{$class} } ) {
2293 next unless (exists $$data{$class}{$row});
2294 next unless ($$data{$class}{$row}{value});
2295 my $fm_obj = $fm_constructor->new;
2296 $fm_obj->value( $$data{$class}{$row}{value} );
2297 $fm_obj->field( $$data{$class}{$row}{field_id} );
2298 $fm_obj->source( $did );
2299 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2301 push @md_list, $fm_obj;
2305 my ($cr) = $$create{$class}->run(@md_list);
2306 unless (defined $cr) {
2307 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2311 unless ($outer_xact) {
2312 $log->debug("Commiting transaction started by the Ingest.", INFO);
2313 my ($c) = $commit->run;
2314 unless (defined $c and $c) {
2316 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2322 __PACKAGE__->register_method(
2323 api_name => "open-ils.worm.wormize",
2324 method => "wormize",
2328 __PACKAGE__->register_method(
2329 api_name => "open-ils.worm.wormize.no_map",
2330 method => "wormize",
2334 __PACKAGE__->register_method(
2335 api_name => "open-ils.worm.wormize.batch",
2336 method => "wormize",
2340 __PACKAGE__->register_method(
2341 api_name => "open-ils.worm.wormize.no_map.batch",
2342 method => "wormize",
2357 my $acreate_source_map;
2372 sub authority_wormize {
2379 if ($self->api_name =~ /no_map/o) {
2383 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2385 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2387 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2389 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2391 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2393 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2394 unless ($aupdate_entry);
2395 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2396 unless ($arm_old_rd);
2397 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2398 unless ($arm_old_fr);
2399 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2400 unless ($ard_create);
2401 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2402 unless ($afr_create);
2405 my ($outer_xact) = $in_xact->run;
2407 unless ($outer_xact) {
2408 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2409 my ($r) = $begin->run($client);
2410 unless (defined $r and $r) {
2412 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2415 } catch Error with {
2416 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2426 for my $entry ( $lookup->run(@docids) ) {
2427 # step -1: grab the doc from storage
2428 next unless ($entry);
2431 # my $xslt_doc = $parser->parse_file(
2432 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2433 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2436 my $xml = $entry->marc;
2437 my $docid = $entry->id;
2438 my $marcdoc = $parser->parse_string($xml);
2439 #my $madsdoc = $mads_sheet->transform($marcdoc);
2441 #my $mads = $madsdoc->documentElement;
2442 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2444 push @entry_list, $entry;
2446 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2447 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2449 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2450 for my $rd_field ( keys %descriptor_code ) {
2451 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2453 $rd_obj->record( $docid );
2454 push @rd_list, $rd_obj;
2456 # step 2: build the KOHA rows
2457 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2458 $_->record( $docid ) for (@tmp_list);
2459 push @ns_list, @tmp_list;
2463 last unless ($self->api_name =~ /batch$/o);
2466 $arm_old_rd->run( { record => \@docids } );
2467 $arm_old_fr->run( { record => \@docids } );
2469 my ($rd) = $ard_create->run(@rd_list);
2470 unless (defined $rd) {
2471 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2474 my ($fr) = $fr_create->run(@ns_list);
2475 unless (defined $fr) {
2476 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2479 unless ($outer_xact) {
2480 $log->debug("Commiting transaction started by Ingest.", INFO);
2481 my ($c) = $commit->run;
2482 unless (defined $c and $c) {
2484 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2490 __PACKAGE__->register_method(
2491 api_name => "open-ils.worm.authortiy.wormize",
2492 method => "wormize",
2496 __PACKAGE__->register_method(
2497 api_name => "open-ils.worm.authority.wormize.batch",
2498 method => "wormize",
2504 # --------------------------------------------------------------------------------
2507 sub _marcxml_to_full_rows {
2509 my $marcxml = shift;
2510 my $type = shift || 'Fieldmapper::metabib::full_rec';
2514 my $root = $marcxml->documentElement;
2516 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2517 next unless $tagline;
2519 my $ns = new Fieldmapper::metabib::full_rec;
2522 my $val = NFD($tagline->textContent);
2523 $val =~ s/(\pM+)//gso;
2529 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2530 next unless $tagline;
2532 my $ns = new Fieldmapper::metabib::full_rec;
2534 $ns->tag( $tagline->getAttribute( "tag" ) );
2535 my $val = NFD($tagline->textContent);
2536 $val =~ s/(\pM+)//gso;
2542 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2543 next unless $tagline;
2545 my $tag = $tagline->getAttribute( "tag" );
2546 my $ind1 = $tagline->getAttribute( "ind1" );
2547 my $ind2 = $tagline->getAttribute( "ind2" );
2549 for my $data ( $tagline->childNodes ) {
2552 my $ns = $type->new;
2557 $ns->subfield( $data->getAttribute( "code" ) );
2558 my $val = NFD($data->textContent);
2559 $val =~ s/(\pM+)//gso;
2560 $ns->value( lc($val) );
2568 sub _get_field_value {
2570 my( $root, $xpath ) = @_;
2574 # grab the set of matching nodes
2575 my @nodes = $root->findnodes( $xpath );
2576 for my $value (@nodes) {
2578 # grab all children of the node
2579 my @children = $value->childNodes();
2580 for my $child (@children) {
2582 # add the childs content to the growing buffer
2583 my $content = quotemeta($child->textContent);
2584 next if ($string =~ /$content/); # uniquify the values
2585 $string .= $child->textContent . " ";
2588 $string .= $value->textContent . " ";
2591 $string = NFD($string);
2592 $string =~ s/(\pM)//gso;
2597 sub modsdoc_to_values {
2598 my( $self, $mods ) = @_;
2600 for my $class (keys %$xpathset) {
2601 $data->{$class} = {};
2602 for my $type (keys %{$xpathset->{$class}}) {
2603 $data->{$class}->{$type} = {};
2604 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};