1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
30 our $log = 'OpenSRF::Utils::Logger';
32 our $parser = XML::LibXML->new();
33 our $xslt = XML::LibXSLT->new();
43 unless (keys %$xpathset) {
44 $log->debug("Running post_init", DEBUG);
46 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
48 unless ($supported_formats{mods}{xslt}) {
49 $log->debug("Loading MODS XSLT", DEBUG);
50 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
51 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
54 unless ($supported_formats{mods3}{xslt}) {
55 $log->debug("Loading MODS v3 XSLT", DEBUG);
56 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
57 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
61 my $req = OpenSRF::AppSession
62 ->create('open-ils.cstore')
63 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
66 if (ref $req and @$req) {
68 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
69 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
70 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
71 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
87 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
91 sub ro_biblio_ingest_single_object {
97 my $document = $parser->parse_string($xml);
99 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
100 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
101 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
102 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
104 $_->source($bib->id) for (@mXfe);
105 $_->record($bib->id) for (@mfr);
106 $rd->record($bib->id);
108 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
110 __PACKAGE__->register_method(
111 api_name => "open-ils.ingest.full.biblio.object.readonly",
112 method => "ro_biblio_ingest_single_object",
117 sub ro_biblio_ingest_single_xml {
122 my $document = $parser->parse_string($xml);
124 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
125 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
126 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
127 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
129 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
131 __PACKAGE__->register_method(
132 api_name => "open-ils.ingest.full.biblio.xml.readonly",
133 method => "ro_biblio_ingest_single_xml",
138 sub ro_biblio_ingest_single_record {
143 OpenILS::Application::Ingest->post_init();
144 my $r = OpenSRF::AppSession
145 ->create('open-ils.cstore')
146 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
149 return undef unless ($r and @$r);
151 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
153 $_->source($rec) for (@{$res->{field_entries}});
154 $_->record($rec) for (@{$res->{full_rec}});
155 $res->{descriptor}->record($rec);
159 __PACKAGE__->register_method(
160 api_name => "open-ils.ingest.full.biblio.record.readonly",
161 method => "ro_biblio_ingest_single_record",
166 sub ro_biblio_ingest_stream_record {
170 OpenILS::Application::Ingest->post_init();
172 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
174 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
176 my $rec = $resp->content;
177 last unless (defined $rec);
179 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
180 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
182 $_->source($rec) for (@{$res->{field_entries}});
183 $_->record($rec) for (@{$res->{full_rec}});
185 $client->respond( $res );
190 __PACKAGE__->register_method(
191 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
192 method => "ro_biblio_ingest_stream_record",
197 sub ro_biblio_ingest_stream_xml {
201 OpenILS::Application::Ingest->post_init();
203 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
205 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
207 my $xml = $resp->content;
208 last unless (defined $xml);
210 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
211 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
213 $client->respond( $res );
218 __PACKAGE__->register_method(
219 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
220 method => "ro_biblio_ingest_stream_xml",
225 sub rw_biblio_ingest_stream_import {
229 OpenILS::Application::Ingest->post_init();
231 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
233 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
235 my $bib = $resp->content;
236 last unless (defined $bib);
238 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
239 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
241 $_->source($bib->id) for (@{$res->{field_entries}});
242 $_->record($bib->id) for (@{$res->{full_rec}});
244 $client->respond( $res );
249 __PACKAGE__->register_method(
250 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
251 method => "rw_biblio_ingest_stream_import",
257 # --------------------------------------------------------------------------------
258 # MARC index extraction
260 package OpenILS::Application::Ingest::XPATH;
261 use base qw/OpenILS::Application::Ingest/;
262 use Unicode::Normalize;
264 # give this an XML documentElement and an XPATH expression
265 sub xpath_to_string {
269 my $ns_prefix = shift;
272 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
276 # grab the set of matching nodes
277 my @nodes = $xml->findnodes( $xpath );
278 for my $value (@nodes) {
280 # grab all children of the node
281 my @children = $value->childNodes();
282 for my $child (@children) {
284 # add the childs content to the growing buffer
285 my $content = quotemeta($child->textContent);
286 next if ($unique && $string =~ /$content/); # uniquify the values
287 $string .= $child->textContent . " ";
290 $string .= $value->textContent . " ";
296 sub class_index_string_xml {
302 OpenILS::Application::Ingest->post_init();
303 $xml = $parser->parse_string($xml) unless (ref $xml);
307 for my $class (@classes) {
308 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
309 for my $type ( keys %{ $xpathset->{$class} } ) {
311 my $def = $xpathset->{$class}->{$type};
312 my $sf = $supported_formats{$def->{format}};
317 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
318 $transform_cache{$def->{format}} = $document;
321 my $value = xpath_to_string(
322 $document->documentElement => $def->{xpath},
323 $sf->{ns} => $def->{format},
329 $value =~ s/\pM+//sgo;
330 $value =~ s/\pC+//sgo;
331 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
333 $value =~ s/(\w)\./$1/sgo;
336 my $fm = $class_constructor->new;
337 $fm->value( $value );
338 $fm->field( $xpathset->{$class}->{$type}->{id} );
339 $client->respond($fm);
344 __PACKAGE__->register_method(
345 api_name => "open-ils.ingest.field_entry.class.xml",
346 method => "class_index_string_xml",
352 sub class_index_string_record {
358 OpenILS::Application::Ingest->post_init();
359 my $r = OpenSRF::AppSession
360 ->create('open-ils.cstore')
361 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
364 return undef unless ($r and @$r);
366 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
368 $client->respond($fm);
372 __PACKAGE__->register_method(
373 api_name => "open-ils.ingest.field_entry.class.record",
374 method => "class_index_string_record",
380 sub all_index_string_xml {
385 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
386 $client->respond($fm);
390 __PACKAGE__->register_method(
391 api_name => "open-ils.ingest.extract.field_entry.all.xml",
392 method => "all_index_string_xml",
398 sub all_index_string_record {
403 OpenILS::Application::Ingest->post_init();
404 my $r = OpenSRF::AppSession
405 ->create('open-ils.cstore')
406 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
409 return undef unless ($r and @$r);
411 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
413 $client->respond($fm);
417 __PACKAGE__->register_method(
418 api_name => "open-ils.ingest.extract.field_entry.all.record",
419 method => "all_index_string_record",
425 # --------------------------------------------------------------------------------
428 package OpenILS::Application::Ingest::FlatMARC;
429 use base qw/OpenILS::Application::Ingest/;
430 use Unicode::Normalize;
433 sub _marcxml_to_full_rows {
436 my $xmltype = shift || 'metabib';
438 my $type = "Fieldmapper::${xmltype}::full_rec";
442 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
444 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
445 next unless $tagline;
450 my $val = $tagline->textContent;
452 $val =~ s/(\pM+)//gso;
458 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
459 next unless $tagline;
463 $ns->tag( $tagline->getAttribute( "tag" ) );
464 my $val = $tagline->textContent;
466 $val =~ s/(\pM+)//gso;
472 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
473 next unless $tagline;
475 my $tag = $tagline->getAttribute( "tag" );
476 my $ind1 = $tagline->getAttribute( "ind1" );
477 my $ind2 = $tagline->getAttribute( "ind2" );
479 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
487 $ns->subfield( $data->getAttribute( "code" ) );
488 my $val = $data->textContent;
490 $val =~ s/(\pM+)//gso;
491 $ns->value( lc($val) );
497 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
506 $log->debug("processing [$xml]");
508 $xml = $parser->parse_string($xml) unless (ref $xml);
510 my $type = 'metabib';
511 $type = 'authority' if ($self->api_name =~ /authority/o);
513 OpenILS::Application::Ingest->post_init();
515 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
518 __PACKAGE__->register_method(
519 api_name => "open-ils.ingest.flat_marc.authority.xml",
520 method => "flat_marc_xml",
525 __PACKAGE__->register_method(
526 api_name => "open-ils.ingest.flat_marc.biblio.xml",
527 method => "flat_marc_xml",
533 sub flat_marc_record {
539 $type = 'authority' if ($self->api_name =~ /authority/o);
541 OpenILS::Application::Ingest->post_init();
542 my $r = OpenSRF::AppSession
543 ->create('open-ils.cstore')
544 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
548 return undef unless ($r and $r->marc);
550 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
551 for my $row (@rows) {
552 $client->respond($row);
553 $log->debug(JSON->perl2JSON($row), DEBUG);
557 __PACKAGE__->register_method(
558 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
559 method => "flat_marc_record",
564 __PACKAGE__->register_method(
565 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
566 method => "flat_marc_record",
572 # --------------------------------------------------------------------------------
575 package OpenILS::Application::Ingest::Biblio::Fingerprint;
576 use base qw/OpenILS::Application::Ingest/;
577 use Unicode::Normalize;
578 use OpenSRF::EX qw/:try/;
580 sub biblio_fingerprint_record {
585 OpenILS::Application::Ingest->post_init();
587 my $r = OpenSRF::AppSession
588 ->create('open-ils.cstore')
589 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
592 return undef unless ($r and $r->marc);
594 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
595 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
598 __PACKAGE__->register_method(
599 api_name => "open-ils.ingest.fingerprint.record",
600 method => "biblio_fingerprint_record",
606 sub biblio_fingerprint {
611 $log->internal("Got MARC [$xml]");
614 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
615 my $conf = OpenSRF::Utils::SettingsClient->new;
617 my $libs = $conf->config_value(@pfx, 'script_path');
618 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
619 my $script_libs = (ref($libs)) ? $libs : [$libs];
621 $log->debug("Loading script $script_file for biblio fingerprinting...");
623 $fp_script = new OpenILS::Utils::ScriptRunner
624 ( file => $script_file,
625 paths => $script_libs,
626 reset_count => 1000 );
629 $fp_script->insert('environment' => {marc => $xml} => 1);
631 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
632 $log->debug("Script for biblio fingerprinting completed successfully...");
636 __PACKAGE__->register_method(
637 api_name => "open-ils.ingest.fingerprint.xml",
638 method => "biblio_fingerprint",
644 sub biblio_descriptor {
649 $log->internal("Got MARC [$xml]");
652 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
653 my $conf = OpenSRF::Utils::SettingsClient->new;
655 my $libs = $conf->config_value(@pfx, 'script_path');
656 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
657 my $script_libs = (ref($libs)) ? $libs : [$libs];
659 $log->debug("Loading script $script_file for biblio descriptor extraction...");
661 $rd_script = new OpenILS::Utils::ScriptRunner
662 ( file => $script_file,
663 paths => $script_libs,
664 reset_count => 1000 );
667 $rd_script->insert('environment' => {marc => $xml} => 1);
669 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
670 $log->debug("Script for biblio descriptor extraction completed successfully...");
674 __PACKAGE__->register_method(
675 api_name => "open-ils.ingest.descriptor.xml",
676 method => "biblio_descriptor",
687 OpenILS::Application::Ingest->post_init();
688 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
691 sub begin_transaction {
695 OpenILS::Application::Ingest->post_init();
696 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
700 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
701 #__PACKAGE__->st_sess->connect;
702 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
703 unless (defined $r and $r) {
704 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
705 #__PACKAGE__->st_sess->disconnect;
706 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
710 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
713 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
716 sub rollback_transaction {
720 OpenILS::Application::Ingest->post_init();
721 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
725 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
727 $log->debug("Ingest isn't inside a transaction.", INFO);
730 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
736 sub commit_transaction {
740 OpenILS::Application::Ingest->post_init();
741 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
744 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
746 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
747 unless (defined $r and $r) {
748 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
749 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
751 #__PACKAGE__->st_sess->disconnect;
753 $log->debug("Ingest isn't inside a transaction.", INFO);
756 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
765 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
766 return shift( @res );
769 sub scrub_authority_record {
775 if (!OpenILS::Application::Ingest->in_transaction) {
776 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
782 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
784 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
785 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
787 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
789 $log->debug('Scrubbing failed : '.shift(), ERROR);
790 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
794 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
795 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
798 __PACKAGE__->register_method(
799 api_name => "open-ils.worm.scrub.authority",
800 method => "scrub_authority_record",
806 sub scrub_metabib_record {
811 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
812 $rec = OpenILS::Application::Ingest->storage_req(
813 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
818 if (!OpenILS::Application::Ingest->in_transaction) {
819 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
825 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
827 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
828 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
829 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
830 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
831 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
832 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
833 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
834 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
836 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
837 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
839 for my $mr (@$masters) {
840 $log->debug( "Found metarecord whose master is $rec", DEBUG);
841 my $others = OpenILS::Application::Ingest->storage_req(
842 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
845 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
846 $mr->master_record($others->[0]->source);
847 OpenILS::Application::Ingest->storage_req(
848 'open-ils.storage.direct.metabib.metarecord.remote_update',
850 { master_record => $others->[0]->source, mods => undef }
853 warn "Removing metarecord whose master is $rec";
854 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
855 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
856 warn "Metarecord removed";
857 $log->debug( "Metarecord removed", DEBUG);
861 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
864 $log->debug('Scrubbing failed : '.shift(), ERROR);
865 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
869 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
870 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
873 __PACKAGE__->register_method(
874 api_name => "open-ils.worm.scrub.biblio",
875 method => "scrub_metabib_record",
880 sub wormize_biblio_metarecord {
885 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
891 $success = wormize_biblio_record($self => $client => $r->source);
893 { record => $r->source,
894 metarecord => $rec->metarecord,
901 { record => $r->source,
902 metarecord => $rec->metarecord,
911 __PACKAGE__->register_method(
912 api_name => "open-ils.worm.wormize.metarecord",
913 method => "wormize_biblio_metarecord",
918 __PACKAGE__->register_method(
919 api_name => "open-ils.worm.wormize.metarecord.nomap",
920 method => "wormize_biblio_metarecord",
925 __PACKAGE__->register_method(
926 api_name => "open-ils.worm.wormize.metarecord.noscrub",
927 method => "wormize_biblio_metarecord",
932 __PACKAGE__->register_method(
933 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
934 method => "wormize_biblio_metarecord",
941 sub wormize_biblio_record {
946 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
947 $rec = OpenILS::Application::Ingest->storage_req(
948 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
954 if (!OpenILS::Application::Ingest->in_transaction) {
955 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
962 unless ($self->api_name =~ /noscrub/o) {
963 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
967 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
970 my @rec_descriptor = ();
982 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
984 my $xml = $parser->parse_string($r->marc);
986 #update the fingerprint
987 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
988 OpenILS::Application::Ingest->storage_req(
989 'open-ils.storage.direct.biblio.record_entry.remote_update',
991 { fingerprint => $fp->{fingerprint},
992 quality => int($fp->{quality}) }
993 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
996 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
997 $fr->record( $r->id );
1001 # the rec_descriptor stuff
1002 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1003 $rd->record( $r->id );
1004 push @rec_descriptor, $rd;
1006 # the indexing field entry stuff
1007 for my $class ( qw/title author subject keyword series/ ) {
1008 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1009 $fe->source( $r->id );
1010 push @{$field_entry{$class}}, $fe;
1014 unless ($self->api_name =~ /nomap/o) {
1015 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1018 $mr = Fieldmapper::metabib::metarecord->new;
1019 $mr->fingerprint( $fp->{fingerprint} );
1020 $mr->master_record( $r->id );
1021 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1024 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1025 $mr_map->metarecord( $mr->id );
1026 $mr_map->source( $r->id );
1027 push @source_map, $mr_map;
1029 $metarecord{$mr->id} = $mr;
1031 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1033 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1034 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1039 if (@rec_descriptor) {
1040 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1042 OpenILS::Application::Ingest->storage_req(
1043 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1047 for my $mr ( values %metarecord ) {
1048 my $sources = OpenILS::Application::Ingest->storage_req(
1049 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1053 my $bibs = OpenILS::Application::Ingest->storage_req(
1054 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1055 [ map { $_->source } @$sources ]
1058 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1060 OpenILS::Application::Ingest->storage_req(
1061 'open-ils.storage.direct.metabib.metarecord.remote_update',
1063 { master_record => $master->id, mods => undef }
1067 OpenILS::Application::Ingest->storage_req(
1068 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1070 ) if (@rec_descriptor);
1072 OpenILS::Application::Ingest->storage_req(
1073 'open-ils.storage.direct.metabib.full_rec.batch.create',
1077 OpenILS::Application::Ingest->storage_req(
1078 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1079 @{ $field_entry{title} }
1080 ) if (@{ $field_entry{title} });
1082 OpenILS::Application::Ingest->storage_req(
1083 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1084 @{ $field_entry{author} }
1085 ) if (@{ $field_entry{author} });
1087 OpenILS::Application::Ingest->storage_req(
1088 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1089 @{ $field_entry{subject} }
1090 ) if (@{ $field_entry{subject} });
1092 OpenILS::Application::Ingest->storage_req(
1093 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1094 @{ $field_entry{keyword} }
1095 ) if (@{ $field_entry{keyword} });
1097 OpenILS::Application::Ingest->storage_req(
1098 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1099 @{ $field_entry{series} }
1100 ) if (@{ $field_entry{series} });
1102 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1108 $log->debug('Wormization failed : '.shift(), ERROR);
1109 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1113 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1114 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1117 __PACKAGE__->register_method(
1118 api_name => "open-ils.worm.wormize.biblio",
1119 method => "wormize_biblio_record",
1123 __PACKAGE__->register_method(
1124 api_name => "open-ils.worm.wormize.biblio.nomap",
1125 method => "wormize_biblio_record",
1129 __PACKAGE__->register_method(
1130 api_name => "open-ils.worm.wormize.biblio.noscrub",
1131 method => "wormize_biblio_record",
1135 __PACKAGE__->register_method(
1136 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1137 method => "wormize_biblio_record",
1142 sub wormize_authority_record {
1148 if (!OpenILS::Application::Ingest->in_transaction) {
1149 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1155 # clean up the cruft
1156 unless ($self->api_name =~ /noscrub/o) {
1157 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1161 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1164 my @rec_descriptor = ();
1165 for my $r (@$bibs) {
1166 my $xml = $parser->parse_string($r->marc);
1168 # the full_rec stuff
1169 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1170 $fr->record( $r->id );
1171 push @full_rec, $fr;
1174 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1175 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1176 #$rd->record( $r->id );
1177 #push @rec_descriptor, $rd;
1181 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1183 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1184 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1186 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1189 $log->debug('Wormization failed : '.shift(), ERROR);
1190 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1194 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1195 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1198 __PACKAGE__->register_method(
1199 api_name => "open-ils.worm.wormize.authority",
1200 method => "wormize_authority_record",
1204 __PACKAGE__->register_method(
1205 api_name => "open-ils.worm.wormize.authority.noscrub",
1206 method => "wormize_authority_record",
1212 # --------------------------------------------------------------------------------
1213 # MARC index extraction
1215 package OpenILS::Application::Ingest::XPATH;
1216 use base qw/OpenILS::Application::Ingest/;
1217 use Unicode::Normalize;
1219 # give this a MODS documentElement and an XPATH expression
1220 sub _xpath_to_string {
1224 my $ns_prefix = shift;
1227 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1231 # grab the set of matching nodes
1232 my @nodes = $xml->findnodes( $xpath );
1233 for my $value (@nodes) {
1235 # grab all children of the node
1236 my @children = $value->childNodes();
1237 for my $child (@children) {
1239 # add the childs content to the growing buffer
1240 my $content = quotemeta($child->textContent);
1241 next if ($unique && $string =~ /$content/); # uniquify the values
1242 $string .= $child->textContent . " ";
1245 $string .= $value->textContent . " ";
1248 return NFD($string);
1251 sub class_all_index_string_xml {
1257 OpenILS::Application::Ingest->post_init();
1258 $xml = $parser->parse_string($xml) unless (ref $xml);
1260 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1261 for my $type ( keys %{ $xpathset->{$class} } ) {
1262 my $value = _xpath_to_string(
1263 $mods_sheet->transform($xml)->documentElement,
1264 $xpathset->{$class}->{$type}->{xpath},
1265 "http://www.loc.gov/mods/",
1272 $value =~ s/\pM+//sgo;
1273 $value =~ s/\pC+//sgo;
1274 #$value =~ s/[\x{0080}-\x{fffd}]//sgoe;
1276 $value =~ s/(\w)\./$1/sgo;
1277 $value = lc($value);
1279 my $fm = $class_constructor->new;
1280 $fm->value( $value );
1281 $fm->field( $xpathset->{$class}->{$type}->{id} );
1282 $client->respond($fm);
1286 __PACKAGE__->register_method(
1287 api_name => "open-ils.worm.field_entry.class.xml",
1288 method => "class_all_index_string_xml",
1294 sub class_all_index_string_record {
1300 OpenILS::Application::Ingest->post_init();
1301 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1303 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1305 $client->respond($fm);
1309 __PACKAGE__->register_method(
1310 api_name => "open-ils.worm.field_entry.class.record",
1311 method => "class_all_index_string_record",
1318 sub class_index_string_xml {
1325 OpenILS::Application::Ingest->post_init();
1326 $xml = $parser->parse_string($xml) unless (ref $xml);
1327 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1329 __PACKAGE__->register_method(
1330 api_name => "open-ils.worm.class.type.xml",
1331 method => "class_index_string_xml",
1336 sub class_index_string_record {
1343 OpenILS::Application::Ingest->post_init();
1344 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1346 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1347 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1350 __PACKAGE__->register_method(
1351 api_name => "open-ils.worm.class.type.record",
1352 method => "class_index_string_record",
1366 OpenILS::Application::Ingest->post_init();
1367 $xml = $parser->parse_string($xml) unless (ref $xml);
1368 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1370 __PACKAGE__->register_method(
1371 api_name => "open-ils.worm.xpath.xml",
1372 method => "xml_xpath",
1386 OpenILS::Application::Ingest->post_init();
1387 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1389 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1390 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1393 __PACKAGE__->register_method(
1394 api_name => "open-ils.worm.xpath.record",
1395 method => "record_xpath",
1401 # --------------------------------------------------------------------------------
1404 package OpenILS::Application::Ingest::Biblio::Leader;
1405 use base qw/OpenILS::Application::Ingest/;
1406 use Unicode::Normalize;
1408 our %marc_type_groups = (
1411 VIS => q/[gkro]{1}/,
1420 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1424 our %biblio_descriptor_code = (
1425 item_type => sub { substr($ldr,6,1); },
1428 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1429 return substr($oo8,29,1);
1430 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1431 return substr($oo8,23,1);
1435 bib_level => sub { substr($ldr,7,1); },
1436 control_type => sub { substr($ldr,8,1); },
1437 char_encoding => sub { substr($ldr,9,1); },
1438 enc_level => sub { substr($ldr,17,1); },
1439 cat_form => sub { substr($ldr,18,1); },
1440 pub_status => sub { substr($ldr,5,1); },
1441 item_lang => sub { substr($oo8,35,3); },
1442 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1443 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1444 audience => sub { substr($oo8,22,1); },
1447 sub _extract_biblio_descriptors {
1450 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1451 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1452 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1454 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1455 for my $rd_field ( keys %biblio_descriptor_code ) {
1456 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1462 sub extract_biblio_desc_xml {
1467 $xml = $parser->parse_string($xml) unless (ref $xml);
1469 return _extract_biblio_descriptors( $xml );
1471 __PACKAGE__->register_method(
1472 api_name => "open-ils.worm.biblio_leader.xml",
1473 method => "extract_biblio_desc_xml",
1478 sub extract_biblio_desc_record {
1483 OpenILS::Application::Ingest->post_init();
1484 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1486 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1487 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1490 __PACKAGE__->register_method(
1491 api_name => "open-ils.worm.biblio_leader.record",
1492 method => "extract_biblio_desc_record",
1497 # --------------------------------------------------------------------------------
1500 package OpenILS::Application::Ingest::FlatMARC;
1501 use base qw/OpenILS::Application::Ingest/;
1502 use Unicode::Normalize;
1505 sub _marcxml_to_full_rows {
1507 my $marcxml = shift;
1508 my $xmltype = shift || 'metabib';
1510 my $type = "Fieldmapper::${xmltype}::full_rec";
1514 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1516 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1517 next unless $tagline;
1519 my $ns = $type->new;
1522 my $val = $tagline->textContent;
1524 $val =~ s/(\pM+)//gso;
1530 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1531 next unless $tagline;
1533 my $ns = $type->new;
1535 $ns->tag( $tagline->getAttribute( "tag" ) );
1536 my $val = $tagline->textContent;
1538 $val =~ s/(\pM+)//gso;
1544 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1545 next unless $tagline;
1547 my $tag = $tagline->getAttribute( "tag" );
1548 my $ind1 = $tagline->getAttribute( "ind1" );
1549 my $ind2 = $tagline->getAttribute( "ind2" );
1551 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1554 my $ns = $type->new;
1559 $ns->subfield( $data->getAttribute( "code" ) );
1560 my $val = $data->textContent;
1562 $val =~ s/(\pM+)//gso;
1563 $ns->value( lc($val) );
1569 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1578 $xml = $parser->parse_string($xml) unless (ref $xml);
1580 my $type = 'metabib';
1581 $type = 'authority' if ($self->api_name =~ /authority/o);
1583 OpenILS::Application::Ingest->post_init();
1585 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1588 __PACKAGE__->register_method(
1589 api_name => "open-ils.worm.flat_marc.authority.xml",
1590 method => "flat_marc_xml",
1595 __PACKAGE__->register_method(
1596 api_name => "open-ils.worm.flat_marc.biblio.xml",
1597 method => "flat_marc_xml",
1603 sub flat_marc_record {
1608 my $type = 'biblio';
1609 $type = 'authority' if ($self->api_name =~ /authority/o);
1611 OpenILS::Application::Ingest->post_init();
1612 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1614 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1617 __PACKAGE__->register_method(
1618 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1619 method => "flat_marc_record",
1624 __PACKAGE__->register_method(
1625 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1626 method => "flat_marc_record",
1633 # --------------------------------------------------------------------------------
1636 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1637 use base qw/OpenILS::Application::Ingest/;
1638 use Unicode::Normalize;
1639 use OpenSRF::EX qw/:try/;
1641 my @fp_mods_xpath = (
1642 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1645 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1646 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1647 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1648 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1651 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1653 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1654 $text =~ s/\pM+//gso;
1655 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1657 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1658 $text =~ s/\s+/ /sgo;
1659 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1660 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1661 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1662 $text =~ s/\b(?:the|an?)\b//sgo;
1663 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1664 $text =~ s/\[.[^\]]+\]//sgo;
1665 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1666 $text =~ s/\s*[;\/\.]*$//sgo;
1667 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1672 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1673 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1676 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1678 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1679 $text =~ s/\pM+//gso;
1680 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1682 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1683 $text =~ s/\s+/ /sgo;
1684 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1685 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1686 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1687 $text =~ s/,?\s+.*$//sgo;
1688 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1693 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1696 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1697 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1698 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1699 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1700 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1701 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1702 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1703 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1706 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1708 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1709 $text =~ s/\pM+//gso;
1710 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1712 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1713 $text =~ s/\s+/ /sgo;
1714 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1715 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1716 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1717 $text =~ s/\b(?:the|an?)\b//sgo;
1718 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1719 $text =~ s/\[.[^\]]+\]//sgo;
1720 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1721 $text =~ s/\s*[;\/\.]*$//sgo;
1722 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1727 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1728 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1729 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1730 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1733 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1735 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1736 $text =~ s/\pM+//gso;
1737 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1739 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1740 $text =~ s/\s+/ /sgo;
1741 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1742 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1743 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1744 $text =~ s/,?\s+.*$//sgo;
1745 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1752 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1756 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1760 my $match_index = 0;
1761 my $block_index = 1;
1762 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1763 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1765 my $block_name_index = 0;
1766 my $block_value_index = 1;
1767 my $block = $fp_mods_xpath[$block_index];
1768 while ( my $part = $$block[$block_value_index] ) {
1770 for my $xpath ( @{ $part->{xpath} } ) {
1771 $text = $mods->findvalue( $xpath );
1775 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1779 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1780 $fp_string .= $text;
1783 $block_name_index += 2;
1784 $block_value_index += 2;
1788 $fp_string =~ s/\W+//gso;
1789 $log->debug("Fingerprint is [$fp_string]", INFO);;
1799 sub refingerprint_bibrec {
1805 if (!OpenILS::Application::Ingest->in_transaction) {
1806 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1812 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1813 for my $b (@$bibs) {
1814 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
1816 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
1818 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
1820 OpenILS::Application::Ingest->storage_req(
1821 'open-ils.storage.direct.biblio.record_entry.remote_update',
1823 { fingerprint => $fp->{fingerprint},
1824 quality => $fp->{quality} }
1827 if ($self->api_name !~ /nomap/o) {
1828 my $old_source_map = OpenILS::Application::Ingest->storage_req(
1829 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
1834 if (ref($old_source_map) and @$old_source_map) {
1835 for my $m (@$old_source_map) {
1836 $old_mrid = $m->metarecord;
1837 OpenILS::Application::Ingest->storage_req(
1838 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
1844 my $old_sm = OpenILS::Application::Ingest->storage_req(
1845 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
1846 { metarecord => $old_mrid }
1849 if (ref($old_sm) and @$old_sm == 0) {
1850 OpenILS::Application::Ingest->storage_req(
1851 'open-ils.storage.direct.metabib.metarecord.delete',
1856 my $mr = OpenILS::Application::Ingest->storage_req(
1857 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
1858 { fingerprint => $fp->{fingerprint} }
1862 $mr = Fieldmapper::metabib::metarecord->new;
1863 $mr->fingerprint( $fp->{fingerprint} );
1864 $mr->master_record( $b->id );
1865 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1868 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1869 $mr_map->metarecord( $mr->id );
1870 $mr_map->source( $b->id );
1871 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
1875 $client->respond($b->id);
1879 $log->debug('Fingerprinting failed : '.shift(), ERROR);
1883 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1884 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1887 __PACKAGE__->register_method(
1888 api_name => "open-ils.worm.fingerprint.record.update",
1889 method => "refingerprint_bibrec",
1895 __PACKAGE__->register_method(
1896 api_name => "open-ils.worm.fingerprint.record.update.nomap",
1897 method => "refingerprint_bibrec",
1904 sub fingerprint_bibrec {
1909 OpenILS::Application::Ingest->post_init();
1910 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
1912 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
1913 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1917 __PACKAGE__->register_method(
1918 api_name => "open-ils.worm.fingerprint.record",
1919 method => "fingerprint_bibrec",
1925 sub fingerprint_mods {
1930 OpenILS::Application::Ingest->post_init();
1931 my $mods = $parser->parse_string($xml)->documentElement;
1933 return _fp_mods( $mods );
1935 __PACKAGE__->register_method(
1936 api_name => "open-ils.worm.fingerprint.mods",
1937 method => "fingerprint_mods",
1942 sub fingerprint_marc {
1947 $xml = $parser->parse_string($xml) unless (ref $xml);
1949 OpenILS::Application::Ingest->post_init();
1950 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
1951 $log->debug("Returning [$fp] as fingerprint", INFO);
1954 __PACKAGE__->register_method(
1955 api_name => "open-ils.worm.fingerprint.marc",
1956 method => "fingerprint_marc",
1964 sub biblio_fingerprint_record {
1969 OpenILS::Application::Ingest->post_init();
1971 my $marc = OpenILS::Application::Ingest
1972 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
1975 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
1976 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1979 __PACKAGE__->register_method(
1980 api_name => "open-ils.worm.fingerprint.record",
1981 method => "biblio_fingerprint_record",
1987 sub biblio_fingerprint {
1992 OpenILS::Application::Ingest->post_init();
1994 $marc = $parser->parse_string($marc) unless (ref $marc);
1996 my $mods = OpenILS::Application::Ingest::entityize(
1998 ->transform( $marc )
2004 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2007 $log->internal("Got MARC [$marc]");
2008 $log->internal("Created MODS [$mods]");
2011 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2012 my $conf = OpenSRF::Utils::SettingsClient->new;
2014 my $libs = $conf->config_value(@pfx, 'script_path');
2015 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2016 my $script_libs = (ref($libs)) ? $libs : [$libs];
2018 $log->debug("Loading script $script_file for biblio fingerprinting...");
2020 $fp_script = new OpenILS::Utils::ScriptRunner
2021 ( file => $script_file,
2022 paths => $script_libs,
2023 reset_count => 1000 );
2026 $log->debug("Applying environment for biblio fingerprinting...");
2028 my $env = {marc => $marc, mods => $mods};
2029 #my $res = {fingerprint => '', quality => '0'};
2031 $fp_script->insert('environment' => $env);
2032 #$fp_script->insert('result' => $res);
2034 $log->debug("Running script for biblio fingerprinting...");
2036 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2038 $log->debug("Script for biblio fingerprinting completed successfully...");
2042 __PACKAGE__->register_method(
2043 api_name => "open-ils.worm.fingerprint.marc",
2044 method => "biblio_fingerprint",
2049 # --------------------------------------------------------------------------------
2063 my $create_source_map;
2078 my %descriptor_code = (
2079 item_type => 'substr($ldr,6,1)',
2080 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2081 bib_level => 'substr($ldr,7,1)',
2082 control_type => 'substr($ldr,8,1)',
2083 char_encoding => 'substr($ldr,9,1)',
2084 enc_level => 'substr($ldr,17,1)',
2085 cat_form => 'substr($ldr,18,1)',
2086 pub_status => 'substr($ldr,5,1)',
2087 item_lang => 'substr($oo8,35,3)',
2088 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2089 audience => 'substr($oo8,22,1)',
2099 if ($self->api_name =~ /no_map/o) {
2103 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2105 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2107 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2109 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2111 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2112 unless ($sm_lookup);
2113 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2114 unless ($mr_lookup);
2115 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2116 unless ($mr_update);
2117 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2119 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2120 unless ($update_entry);
2121 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2122 unless ($rm_old_sm);
2123 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2124 unless ($rm_old_rd);
2125 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2126 unless ($rm_old_fr);
2127 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2128 unless ($rm_old_tr);
2129 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2130 unless ($rm_old_ar);
2131 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2132 unless ($rm_old_sr);
2133 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2134 unless ($rm_old_kr);
2135 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2136 unless ($rm_old_ser);
2137 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2138 unless ($mr_create);
2139 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2140 unless ($create_source_map);
2141 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2142 unless ($rd_create);
2143 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2144 unless ($fr_create);
2145 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2146 unless ($$create{title});
2147 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2148 unless ($$create{author});
2149 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2150 unless ($$create{subject});
2151 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2152 unless ($$create{keyword});
2153 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2154 unless ($$create{series});
2157 my ($outer_xact) = $in_xact->run;
2159 unless ($outer_xact) {
2160 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2161 my ($r) = $begin->run($client);
2162 unless (defined $r and $r) {
2164 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2167 } catch Error with {
2168 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2178 for my $entry ( $lookup->run(@docids) ) {
2179 # step -1: grab the doc from storage
2180 next unless ($entry);
2183 my $xslt_doc = $parser->parse_file(
2184 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2185 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2188 my $xml = $entry->marc;
2189 my $docid = $entry->id;
2190 my $marcdoc = $parser->parse_string($xml);
2191 my $modsdoc = $mods_sheet->transform($marcdoc);
2193 my $mods = $modsdoc->documentElement;
2194 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2196 $entry->fingerprint( fingerprint_mods( $mods ) );
2197 push @entry_list, $entry;
2199 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2202 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2203 if (!$mr || !@$mr) {
2204 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2205 $mr = new Fieldmapper::metabib::metarecord;
2206 $mr->fingerprint( $entry->fingerprint );
2207 $mr->master_record( $entry->id );
2208 my ($new_mr) = $mr_create->run($mr);
2210 unless (defined $mr) {
2211 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2214 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2219 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2220 $sm->metarecord( $mr->id );
2221 $sm->source( $entry->id );
2222 push @source_maps, $sm;
2225 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2226 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2228 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2229 for my $rd_field ( keys %descriptor_code ) {
2230 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2232 $rd_obj->record( $docid );
2233 push @rd_list, $rd_obj;
2235 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2237 # step 2: build the KOHA rows
2238 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2239 $_->record( $docid ) for (@tmp_list);
2240 push @ns_list, @tmp_list;
2244 last unless ($self->api_name =~ /batch$/o);
2247 $rm_old_rd->run( { record => \@docids } );
2248 $rm_old_fr->run( { record => \@docids } );
2249 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2250 $rm_old_tr->run( { source => \@docids } );
2251 $rm_old_ar->run( { source => \@docids } );
2252 $rm_old_sr->run( { source => \@docids } );
2253 $rm_old_kr->run( { source => \@docids } );
2254 $rm_old_ser->run( { source => \@docids } );
2257 my ($sm) = $create_source_map->run(@source_maps);
2258 unless (defined $sm) {
2259 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2261 my ($mr) = $mr_update->run(@mr_list);
2262 unless (defined $mr) {
2263 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2267 my ($re) = $update_entry->run(@entry_list);
2268 unless (defined $re) {
2269 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2272 my ($rd) = $rd_create->run(@rd_list);
2273 unless (defined $rd) {
2274 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2277 my ($fr) = $fr_create->run(@ns_list);
2278 unless (defined $fr) {
2279 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2282 # step 5: insert the new metadata
2283 for my $class ( qw/title author subject keyword series/ ) {
2285 for my $doc ( @mods_data ) {
2286 my ($did) = keys %$doc;
2287 my ($data) = values %$doc;
2289 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2290 for my $row ( keys %{ $$data{$class} } ) {
2291 next unless (exists $$data{$class}{$row});
2292 next unless ($$data{$class}{$row}{value});
2293 my $fm_obj = $fm_constructor->new;
2294 $fm_obj->value( $$data{$class}{$row}{value} );
2295 $fm_obj->field( $$data{$class}{$row}{field_id} );
2296 $fm_obj->source( $did );
2297 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2299 push @md_list, $fm_obj;
2303 my ($cr) = $$create{$class}->run(@md_list);
2304 unless (defined $cr) {
2305 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2309 unless ($outer_xact) {
2310 $log->debug("Commiting transaction started by the Ingest.", INFO);
2311 my ($c) = $commit->run;
2312 unless (defined $c and $c) {
2314 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2320 __PACKAGE__->register_method(
2321 api_name => "open-ils.worm.wormize",
2322 method => "wormize",
2326 __PACKAGE__->register_method(
2327 api_name => "open-ils.worm.wormize.no_map",
2328 method => "wormize",
2332 __PACKAGE__->register_method(
2333 api_name => "open-ils.worm.wormize.batch",
2334 method => "wormize",
2338 __PACKAGE__->register_method(
2339 api_name => "open-ils.worm.wormize.no_map.batch",
2340 method => "wormize",
2355 my $acreate_source_map;
2370 sub authority_wormize {
2377 if ($self->api_name =~ /no_map/o) {
2381 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2383 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2385 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2387 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2389 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2391 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2392 unless ($aupdate_entry);
2393 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2394 unless ($arm_old_rd);
2395 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2396 unless ($arm_old_fr);
2397 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2398 unless ($ard_create);
2399 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2400 unless ($afr_create);
2403 my ($outer_xact) = $in_xact->run;
2405 unless ($outer_xact) {
2406 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2407 my ($r) = $begin->run($client);
2408 unless (defined $r and $r) {
2410 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2413 } catch Error with {
2414 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2424 for my $entry ( $lookup->run(@docids) ) {
2425 # step -1: grab the doc from storage
2426 next unless ($entry);
2429 # my $xslt_doc = $parser->parse_file(
2430 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2431 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2434 my $xml = $entry->marc;
2435 my $docid = $entry->id;
2436 my $marcdoc = $parser->parse_string($xml);
2437 #my $madsdoc = $mads_sheet->transform($marcdoc);
2439 #my $mads = $madsdoc->documentElement;
2440 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2442 push @entry_list, $entry;
2444 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2445 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2447 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2448 for my $rd_field ( keys %descriptor_code ) {
2449 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2451 $rd_obj->record( $docid );
2452 push @rd_list, $rd_obj;
2454 # step 2: build the KOHA rows
2455 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2456 $_->record( $docid ) for (@tmp_list);
2457 push @ns_list, @tmp_list;
2461 last unless ($self->api_name =~ /batch$/o);
2464 $arm_old_rd->run( { record => \@docids } );
2465 $arm_old_fr->run( { record => \@docids } );
2467 my ($rd) = $ard_create->run(@rd_list);
2468 unless (defined $rd) {
2469 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2472 my ($fr) = $fr_create->run(@ns_list);
2473 unless (defined $fr) {
2474 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2477 unless ($outer_xact) {
2478 $log->debug("Commiting transaction started by Ingest.", INFO);
2479 my ($c) = $commit->run;
2480 unless (defined $c and $c) {
2482 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2488 __PACKAGE__->register_method(
2489 api_name => "open-ils.worm.authortiy.wormize",
2490 method => "wormize",
2494 __PACKAGE__->register_method(
2495 api_name => "open-ils.worm.authority.wormize.batch",
2496 method => "wormize",
2502 # --------------------------------------------------------------------------------
2505 sub _marcxml_to_full_rows {
2507 my $marcxml = shift;
2508 my $type = shift || 'Fieldmapper::metabib::full_rec';
2512 my $root = $marcxml->documentElement;
2514 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2515 next unless $tagline;
2517 my $ns = new Fieldmapper::metabib::full_rec;
2520 my $val = NFD($tagline->textContent);
2521 $val =~ s/(\pM+)//gso;
2527 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2528 next unless $tagline;
2530 my $ns = new Fieldmapper::metabib::full_rec;
2532 $ns->tag( $tagline->getAttribute( "tag" ) );
2533 my $val = NFD($tagline->textContent);
2534 $val =~ s/(\pM+)//gso;
2540 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2541 next unless $tagline;
2543 my $tag = $tagline->getAttribute( "tag" );
2544 my $ind1 = $tagline->getAttribute( "ind1" );
2545 my $ind2 = $tagline->getAttribute( "ind2" );
2547 for my $data ( $tagline->childNodes ) {
2550 my $ns = $type->new;
2555 $ns->subfield( $data->getAttribute( "code" ) );
2556 my $val = NFD($data->textContent);
2557 $val =~ s/(\pM+)//gso;
2558 $ns->value( lc($val) );
2566 sub _get_field_value {
2568 my( $root, $xpath ) = @_;
2572 # grab the set of matching nodes
2573 my @nodes = $root->findnodes( $xpath );
2574 for my $value (@nodes) {
2576 # grab all children of the node
2577 my @children = $value->childNodes();
2578 for my $child (@children) {
2580 # add the childs content to the growing buffer
2581 my $content = quotemeta($child->textContent);
2582 next if ($string =~ /$content/); # uniquify the values
2583 $string .= $child->textContent . " ";
2586 $string .= $value->textContent . " ";
2589 $string = NFD($string);
2590 $string =~ s/(\pM)//gso;
2595 sub modsdoc_to_values {
2596 my( $self, $mods ) = @_;
2598 for my $class (keys %$xpathset) {
2599 $data->{$class} = {};
2600 for my $type (keys %{$xpathset->{$class}}) {
2601 $data->{$class}->{$type} = {};
2602 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};