1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
25 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
26 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
27 atom => {ns => 'http://www.w3.org/2005/Atom'},
28 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
32 rss10 => {ns => 'http://purl.org/rss/1.0/'},
33 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
38 my $log = 'OpenSRF::Utils::Logger';
40 my $parser = XML::LibXML->new();
41 my $xslt = XML::LibXSLT->new();
51 unless (keys %$xpathset) {
52 $log->debug("Running post_init", DEBUG);
54 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56 unless ($supported_formats{mods}{xslt}) {
57 $log->debug("Loading MODS XSLT", DEBUG);
58 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
59 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
62 unless ($supported_formats{mods3}{xslt}) {
63 $log->debug("Loading MODS v3 XSLT", DEBUG);
64 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
65 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
69 my $req = OpenSRF::AppSession
70 ->create('open-ils.cstore')
71 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
74 if (ref $req and @$req) {
76 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
77 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
78 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
79 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
95 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
99 # --------------------------------------------------------------------------------
102 package OpenILS::Application::Ingest::Biblio;
103 use base qw/OpenILS::Application::Ingest/;
104 use Unicode::Normalize;
106 sub ro_biblio_ingest_single_object {
110 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
112 my $document = $parser->parse_string($xml);
114 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
115 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
116 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
117 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
119 $_->source($bib->id) for (@mXfe);
120 $_->record($bib->id) for (@mfr);
121 $rd->record($bib->id) if ($rd);
123 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
125 __PACKAGE__->register_method(
126 api_name => "open-ils.ingest.full.biblio.object.readonly",
127 method => "ro_biblio_ingest_single_object",
132 sub ro_biblio_ingest_single_xml {
135 my $xml = OpenILS::Application::Ingest::entityize(shift);
137 my $document = $parser->parse_string($xml);
139 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
140 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
141 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
142 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
144 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
146 __PACKAGE__->register_method(
147 api_name => "open-ils.ingest.full.biblio.xml.readonly",
148 method => "ro_biblio_ingest_single_xml",
153 sub ro_biblio_ingest_single_record {
158 OpenILS::Application::Ingest->post_init();
159 my $r = OpenSRF::AppSession
160 ->create('open-ils.cstore')
161 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
164 return undef unless ($r and @$r);
166 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
168 $_->source($rec) for (@{$res->{field_entries}});
169 $_->record($rec) for (@{$res->{full_rec}});
170 $res->{descriptor}->record($rec);
174 __PACKAGE__->register_method(
175 api_name => "open-ils.ingest.full.biblio.record.readonly",
176 method => "ro_biblio_ingest_single_record",
181 sub ro_biblio_ingest_stream_record {
185 OpenILS::Application::Ingest->post_init();
187 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
189 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
191 my $rec = $resp->content;
192 last unless (defined $rec);
194 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
195 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
197 $_->source($rec) for (@{$res->{field_entries}});
198 $_->record($rec) for (@{$res->{full_rec}});
200 $client->respond( $res );
205 __PACKAGE__->register_method(
206 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
207 method => "ro_biblio_ingest_stream_record",
212 sub ro_biblio_ingest_stream_xml {
216 OpenILS::Application::Ingest->post_init();
218 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
220 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
222 my $xml = $resp->content;
223 last unless (defined $xml);
225 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
226 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
228 $client->respond( $res );
233 __PACKAGE__->register_method(
234 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
235 method => "ro_biblio_ingest_stream_xml",
240 sub rw_biblio_ingest_stream_import {
244 OpenILS::Application::Ingest->post_init();
246 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
248 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
250 my $bib = $resp->content;
251 last unless (defined $bib);
253 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
254 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
256 $_->source($bib->id) for (@{$res->{field_entries}});
257 $_->record($bib->id) for (@{$res->{full_rec}});
259 $client->respond( $res );
264 __PACKAGE__->register_method(
265 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
266 method => "rw_biblio_ingest_stream_import",
272 # --------------------------------------------------------------------------------
275 package OpenILS::Application::Ingest::Authority;
276 use base qw/OpenILS::Application::Ingest/;
277 use Unicode::Normalize;
279 sub ro_authority_ingest_single_object {
283 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
285 my $document = $parser->parse_string($xml);
287 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
289 $_->record($bib->id) for (@mfr);
291 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
293 __PACKAGE__->register_method(
294 api_name => "open-ils.ingest.full.authority.object.readonly",
295 method => "ro_authority_ingest_single_object",
300 sub ro_authority_ingest_single_xml {
303 my $xml = OpenILS::Application::Ingest::entityize(shift);
305 my $document = $parser->parse_string($xml);
307 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
309 return { full_rec => \@mfr };
311 __PACKAGE__->register_method(
312 api_name => "open-ils.ingest.full.authority.xml.readonly",
313 method => "ro_authority_ingest_single_xml",
318 sub ro_authority_ingest_single_record {
323 OpenILS::Application::Ingest->post_init();
324 my $r = OpenSRF::AppSession
325 ->create('open-ils.cstore')
326 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
329 return undef unless ($r and @$r);
331 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
333 $_->record($rec) for (@{$res->{full_rec}});
334 $res->{descriptor}->record($rec);
338 __PACKAGE__->register_method(
339 api_name => "open-ils.ingest.full.authority.record.readonly",
340 method => "ro_authority_ingest_single_record",
345 sub ro_authority_ingest_stream_record {
349 OpenILS::Application::Ingest->post_init();
351 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
353 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
355 my $rec = $resp->content;
356 last unless (defined $rec);
358 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
359 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
361 $_->source($rec) for (@{$res->{field_entries}});
362 $_->record($rec) for (@{$res->{full_rec}});
364 $client->respond( $res );
369 __PACKAGE__->register_method(
370 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
371 method => "ro_authority_ingest_stream_record",
376 sub ro_authority_ingest_stream_xml {
380 OpenILS::Application::Ingest->post_init();
382 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
384 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
386 my $xml = $resp->content;
387 last unless (defined $xml);
389 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
390 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
392 $client->respond( $res );
397 __PACKAGE__->register_method(
398 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
399 method => "ro_authority_ingest_stream_xml",
404 sub rw_authority_ingest_stream_import {
408 OpenILS::Application::Ingest->post_init();
410 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
412 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
414 my $bib = $resp->content;
415 last unless (defined $bib);
417 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
418 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
420 $_->source($bib->id) for (@{$res->{field_entries}});
421 $_->record($bib->id) for (@{$res->{full_rec}});
423 $client->respond( $res );
428 __PACKAGE__->register_method(
429 api_name => "open-ils.ingest.full.authority.bib_stream.import",
430 method => "rw_authority_ingest_stream_import",
436 # --------------------------------------------------------------------------------
437 # MARC index extraction
439 package OpenILS::Application::Ingest::XPATH;
440 use base qw/OpenILS::Application::Ingest/;
441 use Unicode::Normalize;
443 # give this an XML documentElement and an XPATH expression
444 sub xpath_to_string {
448 my $ns_prefix = shift;
451 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
455 # grab the set of matching nodes
456 my @nodes = $xml->findnodes( $xpath );
457 for my $value (@nodes) {
459 # grab all children of the node
460 my @children = $value->childNodes();
461 for my $child (@children) {
463 # add the childs content to the growing buffer
464 my $content = quotemeta($child->textContent);
465 next if ($unique && $string =~ /$content/); # uniquify the values
466 $string .= $child->textContent . " ";
469 $string .= $value->textContent . " ";
475 sub class_index_string_xml {
481 OpenILS::Application::Ingest->post_init();
482 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
486 for my $class (@classes) {
487 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
488 for my $type ( keys %{ $xpathset->{$class} } ) {
490 my $def = $xpathset->{$class}->{$type};
491 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
496 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
497 $transform_cache{$def->{format}} = $document;
500 my $value = xpath_to_string(
501 $document->documentElement => $def->{xpath},
502 $sf->{ns} => $def->{format},
508 $value = NFD($value);
509 $value =~ s/\pM+//sgo;
510 $value =~ s/\pC+//sgo;
511 $value =~ s/\W+$//sgo;
513 $value =~ s/(\w)\.+(\w)/$1$2/sgo;
516 my $fm = $class_constructor->new;
517 $fm->value( $value );
518 $fm->field( $xpathset->{$class}->{$type}->{id} );
519 $client->respond($fm);
524 __PACKAGE__->register_method(
525 api_name => "open-ils.ingest.field_entry.class.xml",
526 method => "class_index_string_xml",
532 sub class_index_string_record {
538 OpenILS::Application::Ingest->post_init();
539 my $r = OpenSRF::AppSession
540 ->create('open-ils.cstore')
541 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
544 return undef unless ($r and @$r);
546 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
548 $client->respond($fm);
552 __PACKAGE__->register_method(
553 api_name => "open-ils.ingest.field_entry.class.record",
554 method => "class_index_string_record",
560 sub all_index_string_xml {
565 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
566 $client->respond($fm);
570 __PACKAGE__->register_method(
571 api_name => "open-ils.ingest.extract.field_entry.all.xml",
572 method => "all_index_string_xml",
578 sub all_index_string_record {
583 OpenILS::Application::Ingest->post_init();
584 my $r = OpenSRF::AppSession
585 ->create('open-ils.cstore')
586 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
589 return undef unless ($r and @$r);
591 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
593 $client->respond($fm);
597 __PACKAGE__->register_method(
598 api_name => "open-ils.ingest.extract.field_entry.all.record",
599 method => "all_index_string_record",
605 # --------------------------------------------------------------------------------
608 package OpenILS::Application::Ingest::FlatMARC;
609 use base qw/OpenILS::Application::Ingest/;
610 use Unicode::Normalize;
613 sub _marcxml_to_full_rows {
616 my $xmltype = shift || 'metabib';
618 my $type = "Fieldmapper::${xmltype}::full_rec";
622 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
624 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
625 next unless $tagline;
630 my $val = $tagline->textContent;
640 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
641 next unless $tagline;
645 $ns->tag( $tagline->getAttribute( "tag" ) );
646 my $val = $tagline->textContent;
656 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
657 next unless $tagline;
659 my $tag = $tagline->getAttribute( "tag" );
660 my $ind1 = $tagline->getAttribute( "ind1" );
661 my $ind2 = $tagline->getAttribute( "ind2" );
663 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
671 $ns->subfield( $data->getAttribute( "code" ) );
672 my $val = $data->textContent;
677 $ns->value( lc($val) );
683 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
692 $log->debug("processing [$xml]");
694 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
696 my $type = 'metabib';
697 $type = 'authority' if ($self->api_name =~ /authority/o);
699 OpenILS::Application::Ingest->post_init();
701 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
704 __PACKAGE__->register_method(
705 api_name => "open-ils.ingest.flat_marc.authority.xml",
706 method => "flat_marc_xml",
711 __PACKAGE__->register_method(
712 api_name => "open-ils.ingest.flat_marc.biblio.xml",
713 method => "flat_marc_xml",
719 sub flat_marc_record {
725 $type = 'authority' if ($self->api_name =~ /authority/o);
727 OpenILS::Application::Ingest->post_init();
728 my $r = OpenSRF::AppSession
729 ->create('open-ils.cstore')
730 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
734 return undef unless ($r and $r->marc);
736 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
737 for my $row (@rows) {
738 $client->respond($row);
739 $log->debug(JSON->perl2JSON($row), DEBUG);
743 __PACKAGE__->register_method(
744 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
745 method => "flat_marc_record",
750 __PACKAGE__->register_method(
751 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
752 method => "flat_marc_record",
758 # --------------------------------------------------------------------------------
761 package OpenILS::Application::Ingest::Biblio::Fingerprint;
762 use base qw/OpenILS::Application::Ingest/;
763 use Unicode::Normalize;
764 use OpenSRF::EX qw/:try/;
766 sub biblio_fingerprint_record {
771 OpenILS::Application::Ingest->post_init();
773 my $r = OpenSRF::AppSession
774 ->create('open-ils.cstore')
775 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
778 return undef unless ($r and $r->marc);
780 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
781 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
782 $fp->{quality} = int($fp->{quality});
785 __PACKAGE__->register_method(
786 api_name => "open-ils.ingest.fingerprint.record",
787 method => "biblio_fingerprint_record",
793 sub biblio_fingerprint {
796 my $xml = OpenILS::Application::Ingest::entityize(shift);
798 $log->internal("Got MARC [$xml]");
801 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
802 my $conf = OpenSRF::Utils::SettingsClient->new;
804 my $libs = $conf->config_value(@pfx, 'script_path');
805 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
806 my $script_libs = (ref($libs)) ? $libs : [$libs];
808 $log->debug("Loading script $script_file for biblio fingerprinting...");
810 $fp_script = new OpenILS::Utils::ScriptRunner
811 ( file => $script_file,
812 paths => $script_libs,
813 reset_count => 100 );
816 $fp_script->insert('environment' => {marc => $xml} => 1);
818 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
819 $log->debug("Script for biblio fingerprinting completed successfully...");
823 __PACKAGE__->register_method(
824 api_name => "open-ils.ingest.fingerprint.xml",
825 method => "biblio_fingerprint",
831 sub biblio_descriptor {
834 my $xml = OpenILS::Application::Ingest::entityize(shift);
836 $log->internal("Got MARC [$xml]");
839 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
840 my $conf = OpenSRF::Utils::SettingsClient->new;
842 my $libs = $conf->config_value(@pfx, 'script_path');
843 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
844 my $script_libs = (ref($libs)) ? $libs : [$libs];
846 $log->debug("Loading script $script_file for biblio descriptor extraction...");
848 $rd_script = new OpenILS::Utils::ScriptRunner
849 ( file => $script_file,
850 paths => $script_libs,
851 reset_count => 100 );
854 $log->debug("Setting up environment for descriptor extraction script...");
855 $rd_script->insert('environment.marc' => $xml => 1);
856 $log->debug("Environment building complete...");
858 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
859 $log->debug("Script for biblio descriptor extraction completed successfully");
863 __PACKAGE__->register_method(
864 api_name => "open-ils.ingest.descriptor.xml",
865 method => "biblio_descriptor",
876 OpenILS::Application::Ingest->post_init();
877 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
880 sub begin_transaction {
884 OpenILS::Application::Ingest->post_init();
885 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
889 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
890 #__PACKAGE__->st_sess->connect;
891 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
892 unless (defined $r and $r) {
893 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
894 #__PACKAGE__->st_sess->disconnect;
895 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
899 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
902 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
905 sub rollback_transaction {
909 OpenILS::Application::Ingest->post_init();
910 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
914 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
916 $log->debug("Ingest isn't inside a transaction.", INFO);
919 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
925 sub commit_transaction {
929 OpenILS::Application::Ingest->post_init();
930 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
933 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
935 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
936 unless (defined $r and $r) {
937 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
938 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
940 #__PACKAGE__->st_sess->disconnect;
942 $log->debug("Ingest isn't inside a transaction.", INFO);
945 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
954 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
955 return shift( @res );
958 sub scrub_authority_record {
964 if (!OpenILS::Application::Ingest->in_transaction) {
965 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
971 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
973 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
974 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
976 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
978 $log->debug('Scrubbing failed : '.shift(), ERROR);
979 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
983 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
984 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
987 __PACKAGE__->register_method(
988 api_name => "open-ils.worm.scrub.authority",
989 method => "scrub_authority_record",
995 sub scrub_metabib_record {
1000 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1001 $rec = OpenILS::Application::Ingest->storage_req(
1002 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1007 if (!OpenILS::Application::Ingest->in_transaction) {
1008 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1014 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1016 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1017 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1018 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1019 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1020 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1021 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1022 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1023 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1025 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1026 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1028 for my $mr (@$masters) {
1029 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1030 my $others = OpenILS::Application::Ingest->storage_req(
1031 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1034 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1035 $mr->master_record($others->[0]->source);
1036 OpenILS::Application::Ingest->storage_req(
1037 'open-ils.storage.direct.metabib.metarecord.remote_update',
1039 { master_record => $others->[0]->source, mods => undef }
1042 warn "Removing metarecord whose master is $rec";
1043 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1044 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1045 warn "Metarecord removed";
1046 $log->debug( "Metarecord removed", DEBUG);
1050 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1053 $log->debug('Scrubbing failed : '.shift(), ERROR);
1054 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1058 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1059 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1062 __PACKAGE__->register_method(
1063 api_name => "open-ils.worm.scrub.biblio",
1064 method => "scrub_metabib_record",
1069 sub wormize_biblio_metarecord {
1074 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1077 for my $r (@$recs) {
1080 $success = wormize_biblio_record($self => $client => $r->source);
1082 { record => $r->source,
1083 metarecord => $rec->metarecord,
1084 success => $success,
1087 } catch Error with {
1090 { record => $r->source,
1091 metarecord => $rec->metarecord,
1092 success => $success,
1100 __PACKAGE__->register_method(
1101 api_name => "open-ils.worm.wormize.metarecord",
1102 method => "wormize_biblio_metarecord",
1107 __PACKAGE__->register_method(
1108 api_name => "open-ils.worm.wormize.metarecord.nomap",
1109 method => "wormize_biblio_metarecord",
1114 __PACKAGE__->register_method(
1115 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1116 method => "wormize_biblio_metarecord",
1121 __PACKAGE__->register_method(
1122 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1123 method => "wormize_biblio_metarecord",
1130 sub wormize_biblio_record {
1135 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1136 $rec = OpenILS::Application::Ingest->storage_req(
1137 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1143 if (!OpenILS::Application::Ingest->in_transaction) {
1144 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1150 # clean up the cruft
1151 unless ($self->api_name =~ /noscrub/o) {
1152 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1156 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1159 my @rec_descriptor = ();
1167 my %metarecord = ();
1168 my @source_map = ();
1169 for my $r (@$bibs) {
1171 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1173 my $xml = $parser->parse_string($r->marc);
1175 #update the fingerprint
1176 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1177 OpenILS::Application::Ingest->storage_req(
1178 'open-ils.storage.direct.biblio.record_entry.remote_update',
1180 { fingerprint => $fp->{fingerprint},
1181 quality => int($fp->{quality}) }
1182 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1184 # the full_rec stuff
1185 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1186 $fr->record( $r->id );
1187 push @full_rec, $fr;
1190 # the rec_descriptor stuff
1191 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1192 $rd->record( $r->id );
1193 push @rec_descriptor, $rd;
1195 # the indexing field entry stuff
1196 for my $class ( qw/title author subject keyword series/ ) {
1197 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1198 $fe->source( $r->id );
1199 push @{$field_entry{$class}}, $fe;
1203 unless ($self->api_name =~ /nomap/o) {
1204 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1207 $mr = Fieldmapper::metabib::metarecord->new;
1208 $mr->fingerprint( $fp->{fingerprint} );
1209 $mr->master_record( $r->id );
1210 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1213 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1214 $mr_map->metarecord( $mr->id );
1215 $mr_map->source( $r->id );
1216 push @source_map, $mr_map;
1218 $metarecord{$mr->id} = $mr;
1220 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1222 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1223 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1228 if (@rec_descriptor) {
1229 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1231 OpenILS::Application::Ingest->storage_req(
1232 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1236 for my $mr ( values %metarecord ) {
1237 my $sources = OpenILS::Application::Ingest->storage_req(
1238 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1242 my $bibs = OpenILS::Application::Ingest->storage_req(
1243 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1244 [ map { $_->source } @$sources ]
1247 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1249 OpenILS::Application::Ingest->storage_req(
1250 'open-ils.storage.direct.metabib.metarecord.remote_update',
1252 { master_record => $master->id, mods => undef }
1256 OpenILS::Application::Ingest->storage_req(
1257 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1259 ) if (@rec_descriptor);
1261 OpenILS::Application::Ingest->storage_req(
1262 'open-ils.storage.direct.metabib.full_rec.batch.create',
1266 OpenILS::Application::Ingest->storage_req(
1267 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1268 @{ $field_entry{title} }
1269 ) if (@{ $field_entry{title} });
1271 OpenILS::Application::Ingest->storage_req(
1272 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1273 @{ $field_entry{author} }
1274 ) if (@{ $field_entry{author} });
1276 OpenILS::Application::Ingest->storage_req(
1277 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1278 @{ $field_entry{subject} }
1279 ) if (@{ $field_entry{subject} });
1281 OpenILS::Application::Ingest->storage_req(
1282 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1283 @{ $field_entry{keyword} }
1284 ) if (@{ $field_entry{keyword} });
1286 OpenILS::Application::Ingest->storage_req(
1287 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1288 @{ $field_entry{series} }
1289 ) if (@{ $field_entry{series} });
1291 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1297 $log->debug('Wormization failed : '.shift(), ERROR);
1298 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1302 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1303 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1306 __PACKAGE__->register_method(
1307 api_name => "open-ils.worm.wormize.biblio",
1308 method => "wormize_biblio_record",
1312 __PACKAGE__->register_method(
1313 api_name => "open-ils.worm.wormize.biblio.nomap",
1314 method => "wormize_biblio_record",
1318 __PACKAGE__->register_method(
1319 api_name => "open-ils.worm.wormize.biblio.noscrub",
1320 method => "wormize_biblio_record",
1324 __PACKAGE__->register_method(
1325 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1326 method => "wormize_biblio_record",
1331 sub wormize_authority_record {
1337 if (!OpenILS::Application::Ingest->in_transaction) {
1338 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1344 # clean up the cruft
1345 unless ($self->api_name =~ /noscrub/o) {
1346 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1350 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1353 my @rec_descriptor = ();
1354 for my $r (@$bibs) {
1355 my $xml = $parser->parse_string($r->marc);
1357 # the full_rec stuff
1358 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1359 $fr->record( $r->id );
1360 push @full_rec, $fr;
1363 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1364 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1365 #$rd->record( $r->id );
1366 #push @rec_descriptor, $rd;
1370 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1372 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1373 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1375 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1378 $log->debug('Wormization failed : '.shift(), ERROR);
1379 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1383 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1384 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1387 __PACKAGE__->register_method(
1388 api_name => "open-ils.worm.wormize.authority",
1389 method => "wormize_authority_record",
1393 __PACKAGE__->register_method(
1394 api_name => "open-ils.worm.wormize.authority.noscrub",
1395 method => "wormize_authority_record",
1401 # --------------------------------------------------------------------------------
1402 # MARC index extraction
1404 package OpenILS::Application::Ingest::XPATH;
1405 use base qw/OpenILS::Application::Ingest/;
1406 use Unicode::Normalize;
1408 # give this a MODS documentElement and an XPATH expression
1409 sub _xpath_to_string {
1413 my $ns_prefix = shift;
1416 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1420 # grab the set of matching nodes
1421 my @nodes = $xml->findnodes( $xpath );
1422 for my $value (@nodes) {
1424 # grab all children of the node
1425 my @children = $value->childNodes();
1426 for my $child (@children) {
1428 # add the childs content to the growing buffer
1429 my $content = quotemeta($child->textContent);
1430 next if ($unique && $string =~ /$content/); # uniquify the values
1431 $string .= $child->textContent . " ";
1434 $string .= $value->textContent . " ";
1437 return NFD($string);
1440 sub class_all_index_string_xml {
1446 OpenILS::Application::Ingest->post_init();
1447 $xml = $parser->parse_string($xml) unless (ref $xml);
1449 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1450 for my $type ( keys %{ $xpathset->{$class} } ) {
1451 my $value = _xpath_to_string(
1452 $mods_sheet->transform($xml)->documentElement,
1453 $xpathset->{$class}->{$type}->{xpath},
1454 "http://www.loc.gov/mods/",
1461 $value = NFD($value);
1462 $value =~ s/\pM+//sgo;
1463 $value =~ s/\pC+//sgo;
1464 $value =~ s/\W+$//sgo;
1466 $value =~ s/(\w)\./$1/sgo;
1467 $value = lc($value);
1469 my $fm = $class_constructor->new;
1470 $fm->value( $value );
1471 $fm->field( $xpathset->{$class}->{$type}->{id} );
1472 $client->respond($fm);
1476 __PACKAGE__->register_method(
1477 api_name => "open-ils.worm.field_entry.class.xml",
1478 method => "class_all_index_string_xml",
1484 sub class_all_index_string_record {
1490 OpenILS::Application::Ingest->post_init();
1491 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1493 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1495 $client->respond($fm);
1499 __PACKAGE__->register_method(
1500 api_name => "open-ils.worm.field_entry.class.record",
1501 method => "class_all_index_string_record",
1508 sub class_index_string_xml {
1515 OpenILS::Application::Ingest->post_init();
1516 $xml = $parser->parse_string($xml) unless (ref $xml);
1517 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1519 __PACKAGE__->register_method(
1520 api_name => "open-ils.worm.class.type.xml",
1521 method => "class_index_string_xml",
1526 sub class_index_string_record {
1533 OpenILS::Application::Ingest->post_init();
1534 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1536 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1537 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1540 __PACKAGE__->register_method(
1541 api_name => "open-ils.worm.class.type.record",
1542 method => "class_index_string_record",
1556 OpenILS::Application::Ingest->post_init();
1557 $xml = $parser->parse_string($xml) unless (ref $xml);
1558 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1560 __PACKAGE__->register_method(
1561 api_name => "open-ils.worm.xpath.xml",
1562 method => "xml_xpath",
1576 OpenILS::Application::Ingest->post_init();
1577 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1579 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1580 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1583 __PACKAGE__->register_method(
1584 api_name => "open-ils.worm.xpath.record",
1585 method => "record_xpath",
1591 # --------------------------------------------------------------------------------
1594 package OpenILS::Application::Ingest::Biblio::Leader;
1595 use base qw/OpenILS::Application::Ingest/;
1596 use Unicode::Normalize;
1598 our %marc_type_groups = (
1601 VIS => q/[gkro]{1}/,
1610 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1614 our %biblio_descriptor_code = (
1615 item_type => sub { substr($ldr,6,1); },
1618 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1619 return substr($oo8,29,1);
1620 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1621 return substr($oo8,23,1);
1625 bib_level => sub { substr($ldr,7,1); },
1626 control_type => sub { substr($ldr,8,1); },
1627 char_encoding => sub { substr($ldr,9,1); },
1628 enc_level => sub { substr($ldr,17,1); },
1629 cat_form => sub { substr($ldr,18,1); },
1630 pub_status => sub { substr($ldr,5,1); },
1631 item_lang => sub { substr($oo8,35,3); },
1632 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1633 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1634 audience => sub { substr($oo8,22,1); },
1637 sub _extract_biblio_descriptors {
1640 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1641 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1642 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1644 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1645 for my $rd_field ( keys %biblio_descriptor_code ) {
1646 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1652 sub extract_biblio_desc_xml {
1657 $xml = $parser->parse_string($xml) unless (ref $xml);
1659 return _extract_biblio_descriptors( $xml );
1661 __PACKAGE__->register_method(
1662 api_name => "open-ils.worm.biblio_leader.xml",
1663 method => "extract_biblio_desc_xml",
1668 sub extract_biblio_desc_record {
1673 OpenILS::Application::Ingest->post_init();
1674 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1676 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1677 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1680 __PACKAGE__->register_method(
1681 api_name => "open-ils.worm.biblio_leader.record",
1682 method => "extract_biblio_desc_record",
1687 # --------------------------------------------------------------------------------
1690 package OpenILS::Application::Ingest::FlatMARC;
1691 use base qw/OpenILS::Application::Ingest/;
1692 use Unicode::Normalize;
1695 sub _marcxml_to_full_rows {
1697 my $marcxml = shift;
1698 my $xmltype = shift || 'metabib';
1700 my $type = "Fieldmapper::${xmltype}::full_rec";
1704 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1706 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1707 next unless $tagline;
1709 my $ns = $type->new;
1712 my $val = $tagline->textContent;
1714 $val =~ s/\pM+//sgo;
1715 $val =~ s/\pC+//sgo;
1716 $val =~ s/\W+$//sgo;
1722 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1723 next unless $tagline;
1725 my $ns = $type->new;
1727 $ns->tag( $tagline->getAttribute( "tag" ) );
1728 my $val = $tagline->textContent;
1730 $val =~ s/\pM+//sgo;
1731 $val =~ s/\pC+//sgo;
1732 $val =~ s/\W+$//sgo;
1738 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1739 next unless $tagline;
1741 my $tag = $tagline->getAttribute( "tag" );
1742 my $ind1 = $tagline->getAttribute( "ind1" );
1743 my $ind2 = $tagline->getAttribute( "ind2" );
1745 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1748 my $ns = $type->new;
1753 $ns->subfield( $data->getAttribute( "code" ) );
1754 my $val = $data->textContent;
1756 $val =~ s/\pM+//sgo;
1757 $val =~ s/\pC+//sgo;
1758 $val =~ s/\W+$//sgo;
1759 $ns->value( lc($val) );
1765 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1774 $xml = $parser->parse_string($xml) unless (ref $xml);
1776 my $type = 'metabib';
1777 $type = 'authority' if ($self->api_name =~ /authority/o);
1779 OpenILS::Application::Ingest->post_init();
1781 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1784 __PACKAGE__->register_method(
1785 api_name => "open-ils.worm.flat_marc.authority.xml",
1786 method => "flat_marc_xml",
1791 __PACKAGE__->register_method(
1792 api_name => "open-ils.worm.flat_marc.biblio.xml",
1793 method => "flat_marc_xml",
1799 sub flat_marc_record {
1804 my $type = 'biblio';
1805 $type = 'authority' if ($self->api_name =~ /authority/o);
1807 OpenILS::Application::Ingest->post_init();
1808 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1810 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1813 __PACKAGE__->register_method(
1814 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1815 method => "flat_marc_record",
1820 __PACKAGE__->register_method(
1821 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1822 method => "flat_marc_record",
1829 # --------------------------------------------------------------------------------
1832 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1833 use base qw/OpenILS::Application::Ingest/;
1834 use Unicode::Normalize;
1835 use OpenSRF::EX qw/:try/;
1837 my @fp_mods_xpath = (
1838 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1841 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1842 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1843 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1844 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1847 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1849 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1850 $text =~ s/\pM+//gso;
1851 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1853 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1854 $text =~ s/\s+/ /sgo;
1855 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1856 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1857 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1858 $text =~ s/\b(?:the|an?)\b//sgo;
1859 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1860 $text =~ s/\[.[^\]]+\]//sgo;
1861 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1862 $text =~ s/\s*[;\/\.]*$//sgo;
1863 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1868 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1869 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1872 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1874 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1875 $text =~ s/\pM+//gso;
1876 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1878 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1879 $text =~ s/\s+/ /sgo;
1880 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1881 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1882 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1883 $text =~ s/,?\s+.*$//sgo;
1884 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1889 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1892 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1893 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1894 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1895 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1896 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1897 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1898 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1899 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1902 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1904 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1905 $text =~ s/\pM+//gso;
1906 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1908 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1909 $text =~ s/\s+/ /sgo;
1910 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1911 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1912 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1913 $text =~ s/\b(?:the|an?)\b//sgo;
1914 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1915 $text =~ s/\[.[^\]]+\]//sgo;
1916 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1917 $text =~ s/\s*[;\/\.]*$//sgo;
1918 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1923 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1924 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1925 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1926 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1929 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1931 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1932 $text =~ s/\pM+//gso;
1933 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1935 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1936 $text =~ s/\s+/ /sgo;
1937 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1938 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1939 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1940 $text =~ s/,?\s+.*$//sgo;
1941 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1948 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1952 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1956 my $match_index = 0;
1957 my $block_index = 1;
1958 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1959 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1961 my $block_name_index = 0;
1962 my $block_value_index = 1;
1963 my $block = $fp_mods_xpath[$block_index];
1964 while ( my $part = $$block[$block_value_index] ) {
1966 for my $xpath ( @{ $part->{xpath} } ) {
1967 $text = $mods->findvalue( $xpath );
1971 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1975 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1976 $fp_string .= $text;
1979 $block_name_index += 2;
1980 $block_value_index += 2;
1984 $fp_string =~ s/\W+//gso;
1985 $log->debug("Fingerprint is [$fp_string]", INFO);;
1995 sub refingerprint_bibrec {
2001 if (!OpenILS::Application::Ingest->in_transaction) {
2002 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2008 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2009 for my $b (@$bibs) {
2010 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2012 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2014 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2016 OpenILS::Application::Ingest->storage_req(
2017 'open-ils.storage.direct.biblio.record_entry.remote_update',
2019 { fingerprint => $fp->{fingerprint},
2020 quality => $fp->{quality} }
2023 if ($self->api_name !~ /nomap/o) {
2024 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2025 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2030 if (ref($old_source_map) and @$old_source_map) {
2031 for my $m (@$old_source_map) {
2032 $old_mrid = $m->metarecord;
2033 OpenILS::Application::Ingest->storage_req(
2034 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2040 my $old_sm = OpenILS::Application::Ingest->storage_req(
2041 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2042 { metarecord => $old_mrid }
2045 if (ref($old_sm) and @$old_sm == 0) {
2046 OpenILS::Application::Ingest->storage_req(
2047 'open-ils.storage.direct.metabib.metarecord.delete',
2052 my $mr = OpenILS::Application::Ingest->storage_req(
2053 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2054 { fingerprint => $fp->{fingerprint} }
2058 $mr = Fieldmapper::metabib::metarecord->new;
2059 $mr->fingerprint( $fp->{fingerprint} );
2060 $mr->master_record( $b->id );
2061 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2064 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2065 $mr_map->metarecord( $mr->id );
2066 $mr_map->source( $b->id );
2067 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2071 $client->respond($b->id);
2075 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2079 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2080 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2083 __PACKAGE__->register_method(
2084 api_name => "open-ils.worm.fingerprint.record.update",
2085 method => "refingerprint_bibrec",
2091 __PACKAGE__->register_method(
2092 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2093 method => "refingerprint_bibrec",
2100 sub fingerprint_bibrec {
2105 OpenILS::Application::Ingest->post_init();
2106 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2108 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2109 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2113 __PACKAGE__->register_method(
2114 api_name => "open-ils.worm.fingerprint.record",
2115 method => "fingerprint_bibrec",
2121 sub fingerprint_mods {
2126 OpenILS::Application::Ingest->post_init();
2127 my $mods = $parser->parse_string($xml)->documentElement;
2129 return _fp_mods( $mods );
2131 __PACKAGE__->register_method(
2132 api_name => "open-ils.worm.fingerprint.mods",
2133 method => "fingerprint_mods",
2138 sub fingerprint_marc {
2143 $xml = $parser->parse_string($xml) unless (ref $xml);
2145 OpenILS::Application::Ingest->post_init();
2146 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2147 $log->debug("Returning [$fp] as fingerprint", INFO);
2150 __PACKAGE__->register_method(
2151 api_name => "open-ils.worm.fingerprint.marc",
2152 method => "fingerprint_marc",
2160 sub biblio_fingerprint_record {
2165 OpenILS::Application::Ingest->post_init();
2167 my $marc = OpenILS::Application::Ingest
2168 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2171 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2172 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2175 __PACKAGE__->register_method(
2176 api_name => "open-ils.worm.fingerprint.record",
2177 method => "biblio_fingerprint_record",
2183 sub biblio_fingerprint {
2188 OpenILS::Application::Ingest->post_init();
2190 $marc = $parser->parse_string($marc) unless (ref $marc);
2192 my $mods = OpenILS::Application::Ingest::entityize(
2194 ->transform( $marc )
2200 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2203 $log->internal("Got MARC [$marc]");
2204 $log->internal("Created MODS [$mods]");
2207 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2208 my $conf = OpenSRF::Utils::SettingsClient->new;
2210 my $libs = $conf->config_value(@pfx, 'script_path');
2211 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2212 my $script_libs = (ref($libs)) ? $libs : [$libs];
2214 $log->debug("Loading script $script_file for biblio fingerprinting...");
2216 $fp_script = new OpenILS::Utils::ScriptRunner
2217 ( file => $script_file,
2218 paths => $script_libs,
2219 reset_count => 1000 );
2222 $log->debug("Applying environment for biblio fingerprinting...");
2224 my $env = {marc => $marc, mods => $mods};
2225 #my $res = {fingerprint => '', quality => '0'};
2227 $fp_script->insert('environment' => $env);
2228 #$fp_script->insert('result' => $res);
2230 $log->debug("Running script for biblio fingerprinting...");
2232 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2234 $log->debug("Script for biblio fingerprinting completed successfully...");
2238 __PACKAGE__->register_method(
2239 api_name => "open-ils.worm.fingerprint.marc",
2240 method => "biblio_fingerprint",
2245 # --------------------------------------------------------------------------------
2259 my $create_source_map;
2274 my %descriptor_code = (
2275 item_type => 'substr($ldr,6,1)',
2276 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2277 bib_level => 'substr($ldr,7,1)',
2278 control_type => 'substr($ldr,8,1)',
2279 char_encoding => 'substr($ldr,9,1)',
2280 enc_level => 'substr($ldr,17,1)',
2281 cat_form => 'substr($ldr,18,1)',
2282 pub_status => 'substr($ldr,5,1)',
2283 item_lang => 'substr($oo8,35,3)',
2284 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2285 audience => 'substr($oo8,22,1)',
2295 if ($self->api_name =~ /no_map/o) {
2299 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2301 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2303 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2305 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2307 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2308 unless ($sm_lookup);
2309 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2310 unless ($mr_lookup);
2311 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2312 unless ($mr_update);
2313 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2315 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2316 unless ($update_entry);
2317 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2318 unless ($rm_old_sm);
2319 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2320 unless ($rm_old_rd);
2321 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2322 unless ($rm_old_fr);
2323 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2324 unless ($rm_old_tr);
2325 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2326 unless ($rm_old_ar);
2327 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2328 unless ($rm_old_sr);
2329 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2330 unless ($rm_old_kr);
2331 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2332 unless ($rm_old_ser);
2333 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2334 unless ($mr_create);
2335 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2336 unless ($create_source_map);
2337 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2338 unless ($rd_create);
2339 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2340 unless ($fr_create);
2341 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2342 unless ($$create{title});
2343 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2344 unless ($$create{author});
2345 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2346 unless ($$create{subject});
2347 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2348 unless ($$create{keyword});
2349 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2350 unless ($$create{series});
2353 my ($outer_xact) = $in_xact->run;
2355 unless ($outer_xact) {
2356 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2357 my ($r) = $begin->run($client);
2358 unless (defined $r and $r) {
2360 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2363 } catch Error with {
2364 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2374 for my $entry ( $lookup->run(@docids) ) {
2375 # step -1: grab the doc from storage
2376 next unless ($entry);
2379 my $xslt_doc = $parser->parse_file(
2380 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2381 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2384 my $xml = $entry->marc;
2385 my $docid = $entry->id;
2386 my $marcdoc = $parser->parse_string($xml);
2387 my $modsdoc = $mods_sheet->transform($marcdoc);
2389 my $mods = $modsdoc->documentElement;
2390 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2392 $entry->fingerprint( fingerprint_mods( $mods ) );
2393 push @entry_list, $entry;
2395 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2398 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2399 if (!$mr || !@$mr) {
2400 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2401 $mr = new Fieldmapper::metabib::metarecord;
2402 $mr->fingerprint( $entry->fingerprint );
2403 $mr->master_record( $entry->id );
2404 my ($new_mr) = $mr_create->run($mr);
2406 unless (defined $mr) {
2407 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2410 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2415 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2416 $sm->metarecord( $mr->id );
2417 $sm->source( $entry->id );
2418 push @source_maps, $sm;
2421 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2422 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2424 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2425 for my $rd_field ( keys %descriptor_code ) {
2426 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2428 $rd_obj->record( $docid );
2429 push @rd_list, $rd_obj;
2431 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2433 # step 2: build the KOHA rows
2434 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2435 $_->record( $docid ) for (@tmp_list);
2436 push @ns_list, @tmp_list;
2440 last unless ($self->api_name =~ /batch$/o);
2443 $rm_old_rd->run( { record => \@docids } );
2444 $rm_old_fr->run( { record => \@docids } );
2445 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2446 $rm_old_tr->run( { source => \@docids } );
2447 $rm_old_ar->run( { source => \@docids } );
2448 $rm_old_sr->run( { source => \@docids } );
2449 $rm_old_kr->run( { source => \@docids } );
2450 $rm_old_ser->run( { source => \@docids } );
2453 my ($sm) = $create_source_map->run(@source_maps);
2454 unless (defined $sm) {
2455 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2457 my ($mr) = $mr_update->run(@mr_list);
2458 unless (defined $mr) {
2459 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2463 my ($re) = $update_entry->run(@entry_list);
2464 unless (defined $re) {
2465 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2468 my ($rd) = $rd_create->run(@rd_list);
2469 unless (defined $rd) {
2470 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2473 my ($fr) = $fr_create->run(@ns_list);
2474 unless (defined $fr) {
2475 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2478 # step 5: insert the new metadata
2479 for my $class ( qw/title author subject keyword series/ ) {
2481 for my $doc ( @mods_data ) {
2482 my ($did) = keys %$doc;
2483 my ($data) = values %$doc;
2485 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2486 for my $row ( keys %{ $$data{$class} } ) {
2487 next unless (exists $$data{$class}{$row});
2488 next unless ($$data{$class}{$row}{value});
2489 my $fm_obj = $fm_constructor->new;
2490 $fm_obj->value( $$data{$class}{$row}{value} );
2491 $fm_obj->field( $$data{$class}{$row}{field_id} );
2492 $fm_obj->source( $did );
2493 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2495 push @md_list, $fm_obj;
2499 my ($cr) = $$create{$class}->run(@md_list);
2500 unless (defined $cr) {
2501 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2505 unless ($outer_xact) {
2506 $log->debug("Commiting transaction started by the Ingest.", INFO);
2507 my ($c) = $commit->run;
2508 unless (defined $c and $c) {
2510 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2516 __PACKAGE__->register_method(
2517 api_name => "open-ils.worm.wormize",
2518 method => "wormize",
2522 __PACKAGE__->register_method(
2523 api_name => "open-ils.worm.wormize.no_map",
2524 method => "wormize",
2528 __PACKAGE__->register_method(
2529 api_name => "open-ils.worm.wormize.batch",
2530 method => "wormize",
2534 __PACKAGE__->register_method(
2535 api_name => "open-ils.worm.wormize.no_map.batch",
2536 method => "wormize",
2551 my $acreate_source_map;
2566 sub authority_wormize {
2573 if ($self->api_name =~ /no_map/o) {
2577 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2579 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2581 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2583 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2585 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2587 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2588 unless ($aupdate_entry);
2589 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2590 unless ($arm_old_rd);
2591 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2592 unless ($arm_old_fr);
2593 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2594 unless ($ard_create);
2595 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2596 unless ($afr_create);
2599 my ($outer_xact) = $in_xact->run;
2601 unless ($outer_xact) {
2602 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2603 my ($r) = $begin->run($client);
2604 unless (defined $r and $r) {
2606 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2609 } catch Error with {
2610 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2620 for my $entry ( $lookup->run(@docids) ) {
2621 # step -1: grab the doc from storage
2622 next unless ($entry);
2625 # my $xslt_doc = $parser->parse_file(
2626 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2627 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2630 my $xml = $entry->marc;
2631 my $docid = $entry->id;
2632 my $marcdoc = $parser->parse_string($xml);
2633 #my $madsdoc = $mads_sheet->transform($marcdoc);
2635 #my $mads = $madsdoc->documentElement;
2636 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2638 push @entry_list, $entry;
2640 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2641 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2643 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2644 for my $rd_field ( keys %descriptor_code ) {
2645 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2647 $rd_obj->record( $docid );
2648 push @rd_list, $rd_obj;
2650 # step 2: build the KOHA rows
2651 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2652 $_->record( $docid ) for (@tmp_list);
2653 push @ns_list, @tmp_list;
2657 last unless ($self->api_name =~ /batch$/o);
2660 $arm_old_rd->run( { record => \@docids } );
2661 $arm_old_fr->run( { record => \@docids } );
2663 my ($rd) = $ard_create->run(@rd_list);
2664 unless (defined $rd) {
2665 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2668 my ($fr) = $fr_create->run(@ns_list);
2669 unless (defined $fr) {
2670 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2673 unless ($outer_xact) {
2674 $log->debug("Commiting transaction started by Ingest.", INFO);
2675 my ($c) = $commit->run;
2676 unless (defined $c and $c) {
2678 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2684 __PACKAGE__->register_method(
2685 api_name => "open-ils.worm.authortiy.wormize",
2686 method => "wormize",
2690 __PACKAGE__->register_method(
2691 api_name => "open-ils.worm.authority.wormize.batch",
2692 method => "wormize",
2698 # --------------------------------------------------------------------------------
2701 sub _marcxml_to_full_rows {
2703 my $marcxml = shift;
2704 my $type = shift || 'Fieldmapper::metabib::full_rec';
2708 my $root = $marcxml->documentElement;
2710 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2711 next unless $tagline;
2713 my $ns = new Fieldmapper::metabib::full_rec;
2716 my $val = NFD($tagline->textContent);
2717 $val =~ s/(\pM+)//gso;
2723 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2724 next unless $tagline;
2726 my $ns = new Fieldmapper::metabib::full_rec;
2728 $ns->tag( $tagline->getAttribute( "tag" ) );
2729 my $val = NFD($tagline->textContent);
2730 $val =~ s/(\pM+)//gso;
2736 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2737 next unless $tagline;
2739 my $tag = $tagline->getAttribute( "tag" );
2740 my $ind1 = $tagline->getAttribute( "ind1" );
2741 my $ind2 = $tagline->getAttribute( "ind2" );
2743 for my $data ( $tagline->childNodes ) {
2746 my $ns = $type->new;
2751 $ns->subfield( $data->getAttribute( "code" ) );
2752 my $val = NFD($data->textContent);
2753 $val =~ s/(\pM+)//gso;
2754 $ns->value( lc($val) );
2762 sub _get_field_value {
2764 my( $root, $xpath ) = @_;
2768 # grab the set of matching nodes
2769 my @nodes = $root->findnodes( $xpath );
2770 for my $value (@nodes) {
2772 # grab all children of the node
2773 my @children = $value->childNodes();
2774 for my $child (@children) {
2776 # add the childs content to the growing buffer
2777 my $content = quotemeta($child->textContent);
2778 next if ($string =~ /$content/); # uniquify the values
2779 $string .= $child->textContent . " ";
2782 $string .= $value->textContent . " ";
2785 $string = NFD($string);
2786 $string =~ s/(\pM)//gso;
2791 sub modsdoc_to_values {
2792 my( $self, $mods ) = @_;
2794 for my $class (keys %$xpathset) {
2795 $data->{$class} = {};
2796 for my $type (keys %{$xpathset->{$class}}) {
2797 $data->{$class}->{$type} = {};
2798 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};