1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
25 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
26 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
27 atom => {ns => 'http://www.w3.org/2005/Atom'},
28 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
32 rss10 => {ns => 'http://purl.org/rss/1.0/'},
33 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
38 my $log = 'OpenSRF::Utils::Logger';
40 my $parser = XML::LibXML->new();
41 my $xslt = XML::LibXSLT->new();
51 unless (keys %$xpathset) {
52 $log->debug("Running post_init", DEBUG);
54 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56 unless ($supported_formats{mods}{xslt}) {
57 $log->debug("Loading MODS XSLT", DEBUG);
58 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
59 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
62 unless ($supported_formats{mods3}{xslt}) {
63 $log->debug("Loading MODS v3 XSLT", DEBUG);
64 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
65 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
69 my $req = OpenSRF::AppSession
70 ->create('open-ils.cstore')
71 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
74 if (ref $req and @$req) {
76 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
77 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
78 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
79 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
95 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
99 # --------------------------------------------------------------------------------
102 package OpenILS::Application::Ingest::Biblio;
103 use base qw/OpenILS::Application::Ingest/;
104 use Unicode::Normalize;
106 sub ro_biblio_ingest_single_object {
110 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
112 my $document = $parser->parse_string($xml);
114 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
115 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
116 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
117 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
119 $_->source($bib->id) for (@mXfe);
120 $_->record($bib->id) for (@mfr);
121 $rd->record($bib->id) if ($rd);
123 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
125 __PACKAGE__->register_method(
126 api_name => "open-ils.ingest.full.biblio.object.readonly",
127 method => "ro_biblio_ingest_single_object",
132 sub ro_biblio_ingest_single_xml {
135 my $xml = OpenILS::Application::Ingest::entityize(shift);
137 my $document = $parser->parse_string($xml);
139 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
140 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
141 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
142 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
144 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
146 __PACKAGE__->register_method(
147 api_name => "open-ils.ingest.full.biblio.xml.readonly",
148 method => "ro_biblio_ingest_single_xml",
153 sub ro_biblio_ingest_single_record {
158 OpenILS::Application::Ingest->post_init();
159 my $r = OpenSRF::AppSession
160 ->create('open-ils.cstore')
161 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
164 return undef unless ($r and @$r);
166 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
168 $_->source($rec) for (@{$res->{field_entries}});
169 $_->record($rec) for (@{$res->{full_rec}});
170 $res->{descriptor}->record($rec);
174 __PACKAGE__->register_method(
175 api_name => "open-ils.ingest.full.biblio.record.readonly",
176 method => "ro_biblio_ingest_single_record",
181 sub ro_biblio_ingest_stream_record {
185 OpenILS::Application::Ingest->post_init();
187 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
189 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
191 my $rec = $resp->content;
192 last unless (defined $rec);
194 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
195 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
197 $_->source($rec) for (@{$res->{field_entries}});
198 $_->record($rec) for (@{$res->{full_rec}});
200 $client->respond( $res );
205 __PACKAGE__->register_method(
206 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
207 method => "ro_biblio_ingest_stream_record",
212 sub ro_biblio_ingest_stream_xml {
216 OpenILS::Application::Ingest->post_init();
218 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
220 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
222 my $xml = $resp->content;
223 last unless (defined $xml);
225 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
226 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
228 $client->respond( $res );
233 __PACKAGE__->register_method(
234 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
235 method => "ro_biblio_ingest_stream_xml",
240 sub rw_biblio_ingest_stream_import {
244 OpenILS::Application::Ingest->post_init();
246 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
248 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
250 my $bib = $resp->content;
251 last unless (defined $bib);
253 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
254 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
256 $_->source($bib->id) for (@{$res->{field_entries}});
257 $_->record($bib->id) for (@{$res->{full_rec}});
259 $client->respond( $res );
264 __PACKAGE__->register_method(
265 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
266 method => "rw_biblio_ingest_stream_import",
272 # --------------------------------------------------------------------------------
275 package OpenILS::Application::Ingest::Authority;
276 use base qw/OpenILS::Application::Ingest/;
277 use Unicode::Normalize;
279 sub ro_authority_ingest_single_object {
283 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
285 my $document = $parser->parse_string($xml);
287 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
289 $_->record($bib->id) for (@mfr);
291 return { full_rec => \@mfr };
293 __PACKAGE__->register_method(
294 api_name => "open-ils.ingest.full.authority.object.readonly",
295 method => "ro_authority_ingest_single_object",
300 sub ro_authority_ingest_single_xml {
303 my $xml = OpenILS::Application::Ingest::entityize(shift);
305 my $document = $parser->parse_string($xml);
307 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
309 return { full_rec => \@mfr };
311 __PACKAGE__->register_method(
312 api_name => "open-ils.ingest.full.authority.xml.readonly",
313 method => "ro_authority_ingest_single_xml",
318 sub ro_authority_ingest_single_record {
323 OpenILS::Application::Ingest->post_init();
324 my $r = OpenSRF::AppSession
325 ->create('open-ils.cstore')
326 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
329 return undef unless ($r and @$r);
331 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
333 $_->record($rec) for (@{$res->{full_rec}});
334 $res->{descriptor}->record($rec);
338 __PACKAGE__->register_method(
339 api_name => "open-ils.ingest.full.authority.record.readonly",
340 method => "ro_authority_ingest_single_record",
345 sub ro_authority_ingest_stream_record {
349 OpenILS::Application::Ingest->post_init();
351 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
353 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
355 my $rec = $resp->content;
356 last unless (defined $rec);
358 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
359 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
361 $_->record($rec) for (@{$res->{full_rec}});
363 $client->respond( $res );
368 __PACKAGE__->register_method(
369 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
370 method => "ro_authority_ingest_stream_record",
375 sub ro_authority_ingest_stream_xml {
379 OpenILS::Application::Ingest->post_init();
381 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
383 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
385 my $xml = $resp->content;
386 last unless (defined $xml);
388 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
389 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
391 $client->respond( $res );
396 __PACKAGE__->register_method(
397 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
398 method => "ro_authority_ingest_stream_xml",
403 sub rw_authority_ingest_stream_import {
407 OpenILS::Application::Ingest->post_init();
409 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
411 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
413 my $bib = $resp->content;
414 last unless (defined $bib);
416 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
417 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
419 $_->record($bib->id) for (@{$res->{full_rec}});
421 $client->respond( $res );
426 __PACKAGE__->register_method(
427 api_name => "open-ils.ingest.full.authority.bib_stream.import",
428 method => "rw_authority_ingest_stream_import",
434 # --------------------------------------------------------------------------------
435 # MARC index extraction
437 package OpenILS::Application::Ingest::XPATH;
438 use base qw/OpenILS::Application::Ingest/;
439 use Unicode::Normalize;
441 # give this an XML documentElement and an XPATH expression
442 sub xpath_to_string {
446 my $ns_prefix = shift;
449 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
453 # grab the set of matching nodes
454 my @nodes = $xml->findnodes( $xpath );
455 for my $value (@nodes) {
457 # grab all children of the node
458 my @children = $value->childNodes();
459 for my $child (@children) {
461 # add the childs content to the growing buffer
462 my $content = quotemeta($child->textContent);
463 next if ($unique && $string =~ /$content/); # uniquify the values
464 $string .= $child->textContent . " ";
467 $string .= $value->textContent . " ";
473 sub class_index_string_xml {
479 OpenILS::Application::Ingest->post_init();
480 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
484 for my $class (@classes) {
485 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
486 for my $type ( keys %{ $xpathset->{$class} } ) {
488 my $def = $xpathset->{$class}->{$type};
489 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
494 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
495 $transform_cache{$def->{format}} = $document;
498 my $value = xpath_to_string(
499 $document->documentElement => $def->{xpath},
500 $sf->{ns} => $def->{format},
506 $value = NFD($value);
507 $value =~ s/\pM+//sgo;
508 $value =~ s/\pC+//sgo;
509 $value =~ s/\W+$//sgo;
511 $value =~ s/(\w)\.+(\w)/$1$2/sgo;
514 my $fm = $class_constructor->new;
515 $fm->value( $value );
516 $fm->field( $xpathset->{$class}->{$type}->{id} );
517 $client->respond($fm);
522 __PACKAGE__->register_method(
523 api_name => "open-ils.ingest.field_entry.class.xml",
524 method => "class_index_string_xml",
530 sub class_index_string_record {
536 OpenILS::Application::Ingest->post_init();
537 my $r = OpenSRF::AppSession
538 ->create('open-ils.cstore')
539 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
542 return undef unless ($r and @$r);
544 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
546 $client->respond($fm);
550 __PACKAGE__->register_method(
551 api_name => "open-ils.ingest.field_entry.class.record",
552 method => "class_index_string_record",
558 sub all_index_string_xml {
563 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
564 $client->respond($fm);
568 __PACKAGE__->register_method(
569 api_name => "open-ils.ingest.extract.field_entry.all.xml",
570 method => "all_index_string_xml",
576 sub all_index_string_record {
581 OpenILS::Application::Ingest->post_init();
582 my $r = OpenSRF::AppSession
583 ->create('open-ils.cstore')
584 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
587 return undef unless ($r and @$r);
589 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
591 $client->respond($fm);
595 __PACKAGE__->register_method(
596 api_name => "open-ils.ingest.extract.field_entry.all.record",
597 method => "all_index_string_record",
603 # --------------------------------------------------------------------------------
606 package OpenILS::Application::Ingest::FlatMARC;
607 use base qw/OpenILS::Application::Ingest/;
608 use Unicode::Normalize;
611 sub _marcxml_to_full_rows {
614 my $xmltype = shift || 'metabib';
616 my $type = "Fieldmapper::${xmltype}::full_rec";
620 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
622 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
623 next unless $tagline;
628 my $val = $tagline->textContent;
638 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
639 next unless $tagline;
643 $ns->tag( $tagline->getAttribute( "tag" ) );
644 my $val = $tagline->textContent;
654 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
655 next unless $tagline;
657 my $tag = $tagline->getAttribute( "tag" );
658 my $ind1 = $tagline->getAttribute( "ind1" );
659 my $ind2 = $tagline->getAttribute( "ind2" );
661 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
669 $ns->subfield( $data->getAttribute( "code" ) );
670 my $val = $data->textContent;
675 $ns->value( lc($val) );
681 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
690 $log->debug("processing [$xml]");
692 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
694 my $type = 'metabib';
695 $type = 'authority' if ($self->api_name =~ /authority/o);
697 OpenILS::Application::Ingest->post_init();
699 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
702 __PACKAGE__->register_method(
703 api_name => "open-ils.ingest.flat_marc.authority.xml",
704 method => "flat_marc_xml",
709 __PACKAGE__->register_method(
710 api_name => "open-ils.ingest.flat_marc.biblio.xml",
711 method => "flat_marc_xml",
717 sub flat_marc_record {
723 $type = 'authority' if ($self->api_name =~ /authority/o);
725 OpenILS::Application::Ingest->post_init();
726 my $r = OpenSRF::AppSession
727 ->create('open-ils.cstore')
728 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
732 return undef unless ($r and $r->marc);
734 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
735 for my $row (@rows) {
736 $client->respond($row);
737 $log->debug(JSON->perl2JSON($row), DEBUG);
741 __PACKAGE__->register_method(
742 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
743 method => "flat_marc_record",
748 __PACKAGE__->register_method(
749 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
750 method => "flat_marc_record",
756 # --------------------------------------------------------------------------------
759 package OpenILS::Application::Ingest::Biblio::Fingerprint;
760 use base qw/OpenILS::Application::Ingest/;
761 use Unicode::Normalize;
762 use OpenSRF::EX qw/:try/;
764 sub biblio_fingerprint_record {
769 OpenILS::Application::Ingest->post_init();
771 my $r = OpenSRF::AppSession
772 ->create('open-ils.cstore')
773 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
776 return undef unless ($r and $r->marc);
778 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
779 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
780 $fp->{quality} = int($fp->{quality});
783 __PACKAGE__->register_method(
784 api_name => "open-ils.ingest.fingerprint.record",
785 method => "biblio_fingerprint_record",
791 sub biblio_fingerprint {
794 my $xml = OpenILS::Application::Ingest::entityize(shift);
796 $log->internal("Got MARC [$xml]");
799 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
800 my $conf = OpenSRF::Utils::SettingsClient->new;
802 my $libs = $conf->config_value(@pfx, 'script_path');
803 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
804 my $script_libs = (ref($libs)) ? $libs : [$libs];
806 $log->debug("Loading script $script_file for biblio fingerprinting...");
808 $fp_script = new OpenILS::Utils::ScriptRunner
809 ( file => $script_file,
810 paths => $script_libs,
811 reset_count => 100 );
814 $fp_script->insert('environment' => {marc => $xml} => 1);
816 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
817 $log->debug("Script for biblio fingerprinting completed successfully...");
821 __PACKAGE__->register_method(
822 api_name => "open-ils.ingest.fingerprint.xml",
823 method => "biblio_fingerprint",
829 sub biblio_descriptor {
832 my $xml = OpenILS::Application::Ingest::entityize(shift);
834 $log->internal("Got MARC [$xml]");
837 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
838 my $conf = OpenSRF::Utils::SettingsClient->new;
840 my $libs = $conf->config_value(@pfx, 'script_path');
841 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
842 my $script_libs = (ref($libs)) ? $libs : [$libs];
844 $log->debug("Loading script $script_file for biblio descriptor extraction...");
846 $rd_script = new OpenILS::Utils::ScriptRunner
847 ( file => $script_file,
848 paths => $script_libs,
849 reset_count => 100 );
852 $log->debug("Setting up environment for descriptor extraction script...");
853 $rd_script->insert('environment.marc' => $xml => 1);
854 $log->debug("Environment building complete...");
856 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
857 $log->debug("Script for biblio descriptor extraction completed successfully");
861 __PACKAGE__->register_method(
862 api_name => "open-ils.ingest.descriptor.xml",
863 method => "biblio_descriptor",
874 OpenILS::Application::Ingest->post_init();
875 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
878 sub begin_transaction {
882 OpenILS::Application::Ingest->post_init();
883 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
887 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
888 #__PACKAGE__->st_sess->connect;
889 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
890 unless (defined $r and $r) {
891 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
892 #__PACKAGE__->st_sess->disconnect;
893 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
897 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
900 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
903 sub rollback_transaction {
907 OpenILS::Application::Ingest->post_init();
908 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
912 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
914 $log->debug("Ingest isn't inside a transaction.", INFO);
917 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
923 sub commit_transaction {
927 OpenILS::Application::Ingest->post_init();
928 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
931 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
933 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
934 unless (defined $r and $r) {
935 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
936 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
938 #__PACKAGE__->st_sess->disconnect;
940 $log->debug("Ingest isn't inside a transaction.", INFO);
943 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
952 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
953 return shift( @res );
956 sub scrub_authority_record {
962 if (!OpenILS::Application::Ingest->in_transaction) {
963 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
969 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
971 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
972 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
974 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
976 $log->debug('Scrubbing failed : '.shift(), ERROR);
977 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
981 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
982 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
985 __PACKAGE__->register_method(
986 api_name => "open-ils.worm.scrub.authority",
987 method => "scrub_authority_record",
993 sub scrub_metabib_record {
998 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
999 $rec = OpenILS::Application::Ingest->storage_req(
1000 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1005 if (!OpenILS::Application::Ingest->in_transaction) {
1006 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1012 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1014 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1015 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1016 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1017 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1018 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1019 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1020 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1021 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1023 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1024 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1026 for my $mr (@$masters) {
1027 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1028 my $others = OpenILS::Application::Ingest->storage_req(
1029 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1032 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1033 $mr->master_record($others->[0]->source);
1034 OpenILS::Application::Ingest->storage_req(
1035 'open-ils.storage.direct.metabib.metarecord.remote_update',
1037 { master_record => $others->[0]->source, mods => undef }
1040 warn "Removing metarecord whose master is $rec";
1041 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1042 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1043 warn "Metarecord removed";
1044 $log->debug( "Metarecord removed", DEBUG);
1048 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1051 $log->debug('Scrubbing failed : '.shift(), ERROR);
1052 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1056 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1057 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1060 __PACKAGE__->register_method(
1061 api_name => "open-ils.worm.scrub.biblio",
1062 method => "scrub_metabib_record",
1067 sub wormize_biblio_metarecord {
1072 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1075 for my $r (@$recs) {
1078 $success = wormize_biblio_record($self => $client => $r->source);
1080 { record => $r->source,
1081 metarecord => $rec->metarecord,
1082 success => $success,
1085 } catch Error with {
1088 { record => $r->source,
1089 metarecord => $rec->metarecord,
1090 success => $success,
1098 __PACKAGE__->register_method(
1099 api_name => "open-ils.worm.wormize.metarecord",
1100 method => "wormize_biblio_metarecord",
1105 __PACKAGE__->register_method(
1106 api_name => "open-ils.worm.wormize.metarecord.nomap",
1107 method => "wormize_biblio_metarecord",
1112 __PACKAGE__->register_method(
1113 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1114 method => "wormize_biblio_metarecord",
1119 __PACKAGE__->register_method(
1120 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1121 method => "wormize_biblio_metarecord",
1128 sub wormize_biblio_record {
1133 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1134 $rec = OpenILS::Application::Ingest->storage_req(
1135 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1141 if (!OpenILS::Application::Ingest->in_transaction) {
1142 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1148 # clean up the cruft
1149 unless ($self->api_name =~ /noscrub/o) {
1150 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1154 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1157 my @rec_descriptor = ();
1165 my %metarecord = ();
1166 my @source_map = ();
1167 for my $r (@$bibs) {
1169 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1171 my $xml = $parser->parse_string($r->marc);
1173 #update the fingerprint
1174 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1175 OpenILS::Application::Ingest->storage_req(
1176 'open-ils.storage.direct.biblio.record_entry.remote_update',
1178 { fingerprint => $fp->{fingerprint},
1179 quality => int($fp->{quality}) }
1180 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1182 # the full_rec stuff
1183 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1184 $fr->record( $r->id );
1185 push @full_rec, $fr;
1188 # the rec_descriptor stuff
1189 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1190 $rd->record( $r->id );
1191 push @rec_descriptor, $rd;
1193 # the indexing field entry stuff
1194 for my $class ( qw/title author subject keyword series/ ) {
1195 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1196 $fe->source( $r->id );
1197 push @{$field_entry{$class}}, $fe;
1201 unless ($self->api_name =~ /nomap/o) {
1202 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1205 $mr = Fieldmapper::metabib::metarecord->new;
1206 $mr->fingerprint( $fp->{fingerprint} );
1207 $mr->master_record( $r->id );
1208 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1211 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1212 $mr_map->metarecord( $mr->id );
1213 $mr_map->source( $r->id );
1214 push @source_map, $mr_map;
1216 $metarecord{$mr->id} = $mr;
1218 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1220 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1221 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1226 if (@rec_descriptor) {
1227 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1229 OpenILS::Application::Ingest->storage_req(
1230 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1234 for my $mr ( values %metarecord ) {
1235 my $sources = OpenILS::Application::Ingest->storage_req(
1236 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1240 my $bibs = OpenILS::Application::Ingest->storage_req(
1241 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1242 [ map { $_->source } @$sources ]
1245 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1247 OpenILS::Application::Ingest->storage_req(
1248 'open-ils.storage.direct.metabib.metarecord.remote_update',
1250 { master_record => $master->id, mods => undef }
1254 OpenILS::Application::Ingest->storage_req(
1255 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1257 ) if (@rec_descriptor);
1259 OpenILS::Application::Ingest->storage_req(
1260 'open-ils.storage.direct.metabib.full_rec.batch.create',
1264 OpenILS::Application::Ingest->storage_req(
1265 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1266 @{ $field_entry{title} }
1267 ) if (@{ $field_entry{title} });
1269 OpenILS::Application::Ingest->storage_req(
1270 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1271 @{ $field_entry{author} }
1272 ) if (@{ $field_entry{author} });
1274 OpenILS::Application::Ingest->storage_req(
1275 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1276 @{ $field_entry{subject} }
1277 ) if (@{ $field_entry{subject} });
1279 OpenILS::Application::Ingest->storage_req(
1280 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1281 @{ $field_entry{keyword} }
1282 ) if (@{ $field_entry{keyword} });
1284 OpenILS::Application::Ingest->storage_req(
1285 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1286 @{ $field_entry{series} }
1287 ) if (@{ $field_entry{series} });
1289 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1295 $log->debug('Wormization failed : '.shift(), ERROR);
1296 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1300 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1301 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1304 __PACKAGE__->register_method(
1305 api_name => "open-ils.worm.wormize.biblio",
1306 method => "wormize_biblio_record",
1310 __PACKAGE__->register_method(
1311 api_name => "open-ils.worm.wormize.biblio.nomap",
1312 method => "wormize_biblio_record",
1316 __PACKAGE__->register_method(
1317 api_name => "open-ils.worm.wormize.biblio.noscrub",
1318 method => "wormize_biblio_record",
1322 __PACKAGE__->register_method(
1323 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1324 method => "wormize_biblio_record",
1329 sub wormize_authority_record {
1335 if (!OpenILS::Application::Ingest->in_transaction) {
1336 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1342 # clean up the cruft
1343 unless ($self->api_name =~ /noscrub/o) {
1344 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1348 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1351 my @rec_descriptor = ();
1352 for my $r (@$bibs) {
1353 my $xml = $parser->parse_string($r->marc);
1355 # the full_rec stuff
1356 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1357 $fr->record( $r->id );
1358 push @full_rec, $fr;
1361 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1362 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1363 #$rd->record( $r->id );
1364 #push @rec_descriptor, $rd;
1368 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1370 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1371 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1373 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1376 $log->debug('Wormization failed : '.shift(), ERROR);
1377 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1381 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1382 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1385 __PACKAGE__->register_method(
1386 api_name => "open-ils.worm.wormize.authority",
1387 method => "wormize_authority_record",
1391 __PACKAGE__->register_method(
1392 api_name => "open-ils.worm.wormize.authority.noscrub",
1393 method => "wormize_authority_record",
1399 # --------------------------------------------------------------------------------
1400 # MARC index extraction
1402 package OpenILS::Application::Ingest::XPATH;
1403 use base qw/OpenILS::Application::Ingest/;
1404 use Unicode::Normalize;
1406 # give this a MODS documentElement and an XPATH expression
1407 sub _xpath_to_string {
1411 my $ns_prefix = shift;
1414 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1418 # grab the set of matching nodes
1419 my @nodes = $xml->findnodes( $xpath );
1420 for my $value (@nodes) {
1422 # grab all children of the node
1423 my @children = $value->childNodes();
1424 for my $child (@children) {
1426 # add the childs content to the growing buffer
1427 my $content = quotemeta($child->textContent);
1428 next if ($unique && $string =~ /$content/); # uniquify the values
1429 $string .= $child->textContent . " ";
1432 $string .= $value->textContent . " ";
1435 return NFD($string);
1438 sub class_all_index_string_xml {
1444 OpenILS::Application::Ingest->post_init();
1445 $xml = $parser->parse_string($xml) unless (ref $xml);
1447 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1448 for my $type ( keys %{ $xpathset->{$class} } ) {
1449 my $value = _xpath_to_string(
1450 $mods_sheet->transform($xml)->documentElement,
1451 $xpathset->{$class}->{$type}->{xpath},
1452 "http://www.loc.gov/mods/",
1459 $value = NFD($value);
1460 $value =~ s/\pM+//sgo;
1461 $value =~ s/\pC+//sgo;
1462 $value =~ s/\W+$//sgo;
1464 $value =~ s/(\w)\./$1/sgo;
1465 $value = lc($value);
1467 my $fm = $class_constructor->new;
1468 $fm->value( $value );
1469 $fm->field( $xpathset->{$class}->{$type}->{id} );
1470 $client->respond($fm);
1474 __PACKAGE__->register_method(
1475 api_name => "open-ils.worm.field_entry.class.xml",
1476 method => "class_all_index_string_xml",
1482 sub class_all_index_string_record {
1488 OpenILS::Application::Ingest->post_init();
1489 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1491 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1493 $client->respond($fm);
1497 __PACKAGE__->register_method(
1498 api_name => "open-ils.worm.field_entry.class.record",
1499 method => "class_all_index_string_record",
1506 sub class_index_string_xml {
1513 OpenILS::Application::Ingest->post_init();
1514 $xml = $parser->parse_string($xml) unless (ref $xml);
1515 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1517 __PACKAGE__->register_method(
1518 api_name => "open-ils.worm.class.type.xml",
1519 method => "class_index_string_xml",
1524 sub class_index_string_record {
1531 OpenILS::Application::Ingest->post_init();
1532 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1534 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1535 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1538 __PACKAGE__->register_method(
1539 api_name => "open-ils.worm.class.type.record",
1540 method => "class_index_string_record",
1554 OpenILS::Application::Ingest->post_init();
1555 $xml = $parser->parse_string($xml) unless (ref $xml);
1556 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1558 __PACKAGE__->register_method(
1559 api_name => "open-ils.worm.xpath.xml",
1560 method => "xml_xpath",
1574 OpenILS::Application::Ingest->post_init();
1575 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1577 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1578 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1581 __PACKAGE__->register_method(
1582 api_name => "open-ils.worm.xpath.record",
1583 method => "record_xpath",
1589 # --------------------------------------------------------------------------------
1592 package OpenILS::Application::Ingest::Biblio::Leader;
1593 use base qw/OpenILS::Application::Ingest/;
1594 use Unicode::Normalize;
1596 our %marc_type_groups = (
1599 VIS => q/[gkro]{1}/,
1608 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1612 our %biblio_descriptor_code = (
1613 item_type => sub { substr($ldr,6,1); },
1616 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1617 return substr($oo8,29,1);
1618 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1619 return substr($oo8,23,1);
1623 bib_level => sub { substr($ldr,7,1); },
1624 control_type => sub { substr($ldr,8,1); },
1625 char_encoding => sub { substr($ldr,9,1); },
1626 enc_level => sub { substr($ldr,17,1); },
1627 cat_form => sub { substr($ldr,18,1); },
1628 pub_status => sub { substr($ldr,5,1); },
1629 item_lang => sub { substr($oo8,35,3); },
1630 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1631 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1632 audience => sub { substr($oo8,22,1); },
1635 sub _extract_biblio_descriptors {
1638 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1639 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1640 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1642 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1643 for my $rd_field ( keys %biblio_descriptor_code ) {
1644 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1650 sub extract_biblio_desc_xml {
1655 $xml = $parser->parse_string($xml) unless (ref $xml);
1657 return _extract_biblio_descriptors( $xml );
1659 __PACKAGE__->register_method(
1660 api_name => "open-ils.worm.biblio_leader.xml",
1661 method => "extract_biblio_desc_xml",
1666 sub extract_biblio_desc_record {
1671 OpenILS::Application::Ingest->post_init();
1672 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1674 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1675 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1678 __PACKAGE__->register_method(
1679 api_name => "open-ils.worm.biblio_leader.record",
1680 method => "extract_biblio_desc_record",
1685 # --------------------------------------------------------------------------------
1688 package OpenILS::Application::Ingest::FlatMARC;
1689 use base qw/OpenILS::Application::Ingest/;
1690 use Unicode::Normalize;
1693 sub _marcxml_to_full_rows {
1695 my $marcxml = shift;
1696 my $xmltype = shift || 'metabib';
1698 my $type = "Fieldmapper::${xmltype}::full_rec";
1702 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1704 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1705 next unless $tagline;
1707 my $ns = $type->new;
1710 my $val = $tagline->textContent;
1712 $val =~ s/\pM+//sgo;
1713 $val =~ s/\pC+//sgo;
1714 $val =~ s/\W+$//sgo;
1720 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1721 next unless $tagline;
1723 my $ns = $type->new;
1725 $ns->tag( $tagline->getAttribute( "tag" ) );
1726 my $val = $tagline->textContent;
1728 $val =~ s/\pM+//sgo;
1729 $val =~ s/\pC+//sgo;
1730 $val =~ s/\W+$//sgo;
1736 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1737 next unless $tagline;
1739 my $tag = $tagline->getAttribute( "tag" );
1740 my $ind1 = $tagline->getAttribute( "ind1" );
1741 my $ind2 = $tagline->getAttribute( "ind2" );
1743 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1746 my $ns = $type->new;
1751 $ns->subfield( $data->getAttribute( "code" ) );
1752 my $val = $data->textContent;
1754 $val =~ s/\pM+//sgo;
1755 $val =~ s/\pC+//sgo;
1756 $val =~ s/\W+$//sgo;
1757 $ns->value( lc($val) );
1763 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1772 $xml = $parser->parse_string($xml) unless (ref $xml);
1774 my $type = 'metabib';
1775 $type = 'authority' if ($self->api_name =~ /authority/o);
1777 OpenILS::Application::Ingest->post_init();
1779 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1782 __PACKAGE__->register_method(
1783 api_name => "open-ils.worm.flat_marc.authority.xml",
1784 method => "flat_marc_xml",
1789 __PACKAGE__->register_method(
1790 api_name => "open-ils.worm.flat_marc.biblio.xml",
1791 method => "flat_marc_xml",
1797 sub flat_marc_record {
1802 my $type = 'biblio';
1803 $type = 'authority' if ($self->api_name =~ /authority/o);
1805 OpenILS::Application::Ingest->post_init();
1806 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1808 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1811 __PACKAGE__->register_method(
1812 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1813 method => "flat_marc_record",
1818 __PACKAGE__->register_method(
1819 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1820 method => "flat_marc_record",
1827 # --------------------------------------------------------------------------------
1830 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1831 use base qw/OpenILS::Application::Ingest/;
1832 use Unicode::Normalize;
1833 use OpenSRF::EX qw/:try/;
1835 my @fp_mods_xpath = (
1836 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1839 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1840 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1841 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1842 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1845 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1847 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1848 $text =~ s/\pM+//gso;
1849 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1851 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1852 $text =~ s/\s+/ /sgo;
1853 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1854 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1855 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1856 $text =~ s/\b(?:the|an?)\b//sgo;
1857 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1858 $text =~ s/\[.[^\]]+\]//sgo;
1859 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1860 $text =~ s/\s*[;\/\.]*$//sgo;
1861 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1866 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1867 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1870 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1872 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1873 $text =~ s/\pM+//gso;
1874 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1876 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1877 $text =~ s/\s+/ /sgo;
1878 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1879 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1880 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1881 $text =~ s/,?\s+.*$//sgo;
1882 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1887 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1890 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1891 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1892 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1893 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1894 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1895 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1896 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1897 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1900 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1902 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1903 $text =~ s/\pM+//gso;
1904 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1906 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1907 $text =~ s/\s+/ /sgo;
1908 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1909 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1910 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1911 $text =~ s/\b(?:the|an?)\b//sgo;
1912 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1913 $text =~ s/\[.[^\]]+\]//sgo;
1914 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1915 $text =~ s/\s*[;\/\.]*$//sgo;
1916 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1921 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1922 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1923 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1924 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1927 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1929 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1930 $text =~ s/\pM+//gso;
1931 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1933 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1934 $text =~ s/\s+/ /sgo;
1935 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1936 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1937 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1938 $text =~ s/,?\s+.*$//sgo;
1939 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1946 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1950 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1954 my $match_index = 0;
1955 my $block_index = 1;
1956 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1957 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1959 my $block_name_index = 0;
1960 my $block_value_index = 1;
1961 my $block = $fp_mods_xpath[$block_index];
1962 while ( my $part = $$block[$block_value_index] ) {
1964 for my $xpath ( @{ $part->{xpath} } ) {
1965 $text = $mods->findvalue( $xpath );
1969 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1973 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1974 $fp_string .= $text;
1977 $block_name_index += 2;
1978 $block_value_index += 2;
1982 $fp_string =~ s/\W+//gso;
1983 $log->debug("Fingerprint is [$fp_string]", INFO);;
1993 sub refingerprint_bibrec {
1999 if (!OpenILS::Application::Ingest->in_transaction) {
2000 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2006 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2007 for my $b (@$bibs) {
2008 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2010 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2012 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2014 OpenILS::Application::Ingest->storage_req(
2015 'open-ils.storage.direct.biblio.record_entry.remote_update',
2017 { fingerprint => $fp->{fingerprint},
2018 quality => $fp->{quality} }
2021 if ($self->api_name !~ /nomap/o) {
2022 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2023 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2028 if (ref($old_source_map) and @$old_source_map) {
2029 for my $m (@$old_source_map) {
2030 $old_mrid = $m->metarecord;
2031 OpenILS::Application::Ingest->storage_req(
2032 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2038 my $old_sm = OpenILS::Application::Ingest->storage_req(
2039 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2040 { metarecord => $old_mrid }
2043 if (ref($old_sm) and @$old_sm == 0) {
2044 OpenILS::Application::Ingest->storage_req(
2045 'open-ils.storage.direct.metabib.metarecord.delete',
2050 my $mr = OpenILS::Application::Ingest->storage_req(
2051 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2052 { fingerprint => $fp->{fingerprint} }
2056 $mr = Fieldmapper::metabib::metarecord->new;
2057 $mr->fingerprint( $fp->{fingerprint} );
2058 $mr->master_record( $b->id );
2059 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2062 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2063 $mr_map->metarecord( $mr->id );
2064 $mr_map->source( $b->id );
2065 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2069 $client->respond($b->id);
2073 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2077 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2078 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2081 __PACKAGE__->register_method(
2082 api_name => "open-ils.worm.fingerprint.record.update",
2083 method => "refingerprint_bibrec",
2089 __PACKAGE__->register_method(
2090 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2091 method => "refingerprint_bibrec",
2098 sub fingerprint_bibrec {
2103 OpenILS::Application::Ingest->post_init();
2104 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2106 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2107 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2111 __PACKAGE__->register_method(
2112 api_name => "open-ils.worm.fingerprint.record",
2113 method => "fingerprint_bibrec",
2119 sub fingerprint_mods {
2124 OpenILS::Application::Ingest->post_init();
2125 my $mods = $parser->parse_string($xml)->documentElement;
2127 return _fp_mods( $mods );
2129 __PACKAGE__->register_method(
2130 api_name => "open-ils.worm.fingerprint.mods",
2131 method => "fingerprint_mods",
2136 sub fingerprint_marc {
2141 $xml = $parser->parse_string($xml) unless (ref $xml);
2143 OpenILS::Application::Ingest->post_init();
2144 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2145 $log->debug("Returning [$fp] as fingerprint", INFO);
2148 __PACKAGE__->register_method(
2149 api_name => "open-ils.worm.fingerprint.marc",
2150 method => "fingerprint_marc",
2158 sub biblio_fingerprint_record {
2163 OpenILS::Application::Ingest->post_init();
2165 my $marc = OpenILS::Application::Ingest
2166 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2169 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2170 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2173 __PACKAGE__->register_method(
2174 api_name => "open-ils.worm.fingerprint.record",
2175 method => "biblio_fingerprint_record",
2181 sub biblio_fingerprint {
2186 OpenILS::Application::Ingest->post_init();
2188 $marc = $parser->parse_string($marc) unless (ref $marc);
2190 my $mods = OpenILS::Application::Ingest::entityize(
2192 ->transform( $marc )
2198 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2201 $log->internal("Got MARC [$marc]");
2202 $log->internal("Created MODS [$mods]");
2205 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2206 my $conf = OpenSRF::Utils::SettingsClient->new;
2208 my $libs = $conf->config_value(@pfx, 'script_path');
2209 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2210 my $script_libs = (ref($libs)) ? $libs : [$libs];
2212 $log->debug("Loading script $script_file for biblio fingerprinting...");
2214 $fp_script = new OpenILS::Utils::ScriptRunner
2215 ( file => $script_file,
2216 paths => $script_libs,
2217 reset_count => 1000 );
2220 $log->debug("Applying environment for biblio fingerprinting...");
2222 my $env = {marc => $marc, mods => $mods};
2223 #my $res = {fingerprint => '', quality => '0'};
2225 $fp_script->insert('environment' => $env);
2226 #$fp_script->insert('result' => $res);
2228 $log->debug("Running script for biblio fingerprinting...");
2230 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2232 $log->debug("Script for biblio fingerprinting completed successfully...");
2236 __PACKAGE__->register_method(
2237 api_name => "open-ils.worm.fingerprint.marc",
2238 method => "biblio_fingerprint",
2243 # --------------------------------------------------------------------------------
2257 my $create_source_map;
2272 my %descriptor_code = (
2273 item_type => 'substr($ldr,6,1)',
2274 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2275 bib_level => 'substr($ldr,7,1)',
2276 control_type => 'substr($ldr,8,1)',
2277 char_encoding => 'substr($ldr,9,1)',
2278 enc_level => 'substr($ldr,17,1)',
2279 cat_form => 'substr($ldr,18,1)',
2280 pub_status => 'substr($ldr,5,1)',
2281 item_lang => 'substr($oo8,35,3)',
2282 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2283 audience => 'substr($oo8,22,1)',
2293 if ($self->api_name =~ /no_map/o) {
2297 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2299 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2301 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2303 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2305 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2306 unless ($sm_lookup);
2307 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2308 unless ($mr_lookup);
2309 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2310 unless ($mr_update);
2311 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2313 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2314 unless ($update_entry);
2315 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2316 unless ($rm_old_sm);
2317 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2318 unless ($rm_old_rd);
2319 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2320 unless ($rm_old_fr);
2321 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2322 unless ($rm_old_tr);
2323 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2324 unless ($rm_old_ar);
2325 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2326 unless ($rm_old_sr);
2327 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2328 unless ($rm_old_kr);
2329 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2330 unless ($rm_old_ser);
2331 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2332 unless ($mr_create);
2333 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2334 unless ($create_source_map);
2335 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2336 unless ($rd_create);
2337 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2338 unless ($fr_create);
2339 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2340 unless ($$create{title});
2341 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2342 unless ($$create{author});
2343 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2344 unless ($$create{subject});
2345 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2346 unless ($$create{keyword});
2347 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2348 unless ($$create{series});
2351 my ($outer_xact) = $in_xact->run;
2353 unless ($outer_xact) {
2354 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2355 my ($r) = $begin->run($client);
2356 unless (defined $r and $r) {
2358 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2361 } catch Error with {
2362 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2372 for my $entry ( $lookup->run(@docids) ) {
2373 # step -1: grab the doc from storage
2374 next unless ($entry);
2377 my $xslt_doc = $parser->parse_file(
2378 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2379 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2382 my $xml = $entry->marc;
2383 my $docid = $entry->id;
2384 my $marcdoc = $parser->parse_string($xml);
2385 my $modsdoc = $mods_sheet->transform($marcdoc);
2387 my $mods = $modsdoc->documentElement;
2388 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2390 $entry->fingerprint( fingerprint_mods( $mods ) );
2391 push @entry_list, $entry;
2393 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2396 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2397 if (!$mr || !@$mr) {
2398 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2399 $mr = new Fieldmapper::metabib::metarecord;
2400 $mr->fingerprint( $entry->fingerprint );
2401 $mr->master_record( $entry->id );
2402 my ($new_mr) = $mr_create->run($mr);
2404 unless (defined $mr) {
2405 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2408 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2413 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2414 $sm->metarecord( $mr->id );
2415 $sm->source( $entry->id );
2416 push @source_maps, $sm;
2419 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2420 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2422 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2423 for my $rd_field ( keys %descriptor_code ) {
2424 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2426 $rd_obj->record( $docid );
2427 push @rd_list, $rd_obj;
2429 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2431 # step 2: build the KOHA rows
2432 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2433 $_->record( $docid ) for (@tmp_list);
2434 push @ns_list, @tmp_list;
2438 last unless ($self->api_name =~ /batch$/o);
2441 $rm_old_rd->run( { record => \@docids } );
2442 $rm_old_fr->run( { record => \@docids } );
2443 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2444 $rm_old_tr->run( { source => \@docids } );
2445 $rm_old_ar->run( { source => \@docids } );
2446 $rm_old_sr->run( { source => \@docids } );
2447 $rm_old_kr->run( { source => \@docids } );
2448 $rm_old_ser->run( { source => \@docids } );
2451 my ($sm) = $create_source_map->run(@source_maps);
2452 unless (defined $sm) {
2453 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2455 my ($mr) = $mr_update->run(@mr_list);
2456 unless (defined $mr) {
2457 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2461 my ($re) = $update_entry->run(@entry_list);
2462 unless (defined $re) {
2463 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2466 my ($rd) = $rd_create->run(@rd_list);
2467 unless (defined $rd) {
2468 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2471 my ($fr) = $fr_create->run(@ns_list);
2472 unless (defined $fr) {
2473 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2476 # step 5: insert the new metadata
2477 for my $class ( qw/title author subject keyword series/ ) {
2479 for my $doc ( @mods_data ) {
2480 my ($did) = keys %$doc;
2481 my ($data) = values %$doc;
2483 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2484 for my $row ( keys %{ $$data{$class} } ) {
2485 next unless (exists $$data{$class}{$row});
2486 next unless ($$data{$class}{$row}{value});
2487 my $fm_obj = $fm_constructor->new;
2488 $fm_obj->value( $$data{$class}{$row}{value} );
2489 $fm_obj->field( $$data{$class}{$row}{field_id} );
2490 $fm_obj->source( $did );
2491 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2493 push @md_list, $fm_obj;
2497 my ($cr) = $$create{$class}->run(@md_list);
2498 unless (defined $cr) {
2499 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2503 unless ($outer_xact) {
2504 $log->debug("Commiting transaction started by the Ingest.", INFO);
2505 my ($c) = $commit->run;
2506 unless (defined $c and $c) {
2508 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2514 __PACKAGE__->register_method(
2515 api_name => "open-ils.worm.wormize",
2516 method => "wormize",
2520 __PACKAGE__->register_method(
2521 api_name => "open-ils.worm.wormize.no_map",
2522 method => "wormize",
2526 __PACKAGE__->register_method(
2527 api_name => "open-ils.worm.wormize.batch",
2528 method => "wormize",
2532 __PACKAGE__->register_method(
2533 api_name => "open-ils.worm.wormize.no_map.batch",
2534 method => "wormize",
2549 my $acreate_source_map;
2564 sub authority_wormize {
2571 if ($self->api_name =~ /no_map/o) {
2575 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2577 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2579 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2581 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2583 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2585 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2586 unless ($aupdate_entry);
2587 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2588 unless ($arm_old_rd);
2589 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2590 unless ($arm_old_fr);
2591 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2592 unless ($ard_create);
2593 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2594 unless ($afr_create);
2597 my ($outer_xact) = $in_xact->run;
2599 unless ($outer_xact) {
2600 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2601 my ($r) = $begin->run($client);
2602 unless (defined $r and $r) {
2604 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2607 } catch Error with {
2608 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2618 for my $entry ( $lookup->run(@docids) ) {
2619 # step -1: grab the doc from storage
2620 next unless ($entry);
2623 # my $xslt_doc = $parser->parse_file(
2624 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2625 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2628 my $xml = $entry->marc;
2629 my $docid = $entry->id;
2630 my $marcdoc = $parser->parse_string($xml);
2631 #my $madsdoc = $mads_sheet->transform($marcdoc);
2633 #my $mads = $madsdoc->documentElement;
2634 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2636 push @entry_list, $entry;
2638 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2639 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2641 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2642 for my $rd_field ( keys %descriptor_code ) {
2643 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2645 $rd_obj->record( $docid );
2646 push @rd_list, $rd_obj;
2648 # step 2: build the KOHA rows
2649 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2650 $_->record( $docid ) for (@tmp_list);
2651 push @ns_list, @tmp_list;
2655 last unless ($self->api_name =~ /batch$/o);
2658 $arm_old_rd->run( { record => \@docids } );
2659 $arm_old_fr->run( { record => \@docids } );
2661 my ($rd) = $ard_create->run(@rd_list);
2662 unless (defined $rd) {
2663 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2666 my ($fr) = $fr_create->run(@ns_list);
2667 unless (defined $fr) {
2668 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2671 unless ($outer_xact) {
2672 $log->debug("Commiting transaction started by Ingest.", INFO);
2673 my ($c) = $commit->run;
2674 unless (defined $c and $c) {
2676 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2682 __PACKAGE__->register_method(
2683 api_name => "open-ils.worm.authortiy.wormize",
2684 method => "wormize",
2688 __PACKAGE__->register_method(
2689 api_name => "open-ils.worm.authority.wormize.batch",
2690 method => "wormize",
2696 # --------------------------------------------------------------------------------
2699 sub _marcxml_to_full_rows {
2701 my $marcxml = shift;
2702 my $type = shift || 'Fieldmapper::metabib::full_rec';
2706 my $root = $marcxml->documentElement;
2708 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2709 next unless $tagline;
2711 my $ns = new Fieldmapper::metabib::full_rec;
2714 my $val = NFD($tagline->textContent);
2715 $val =~ s/(\pM+)//gso;
2721 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2722 next unless $tagline;
2724 my $ns = new Fieldmapper::metabib::full_rec;
2726 $ns->tag( $tagline->getAttribute( "tag" ) );
2727 my $val = NFD($tagline->textContent);
2728 $val =~ s/(\pM+)//gso;
2734 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2735 next unless $tagline;
2737 my $tag = $tagline->getAttribute( "tag" );
2738 my $ind1 = $tagline->getAttribute( "ind1" );
2739 my $ind2 = $tagline->getAttribute( "ind2" );
2741 for my $data ( $tagline->childNodes ) {
2744 my $ns = $type->new;
2749 $ns->subfield( $data->getAttribute( "code" ) );
2750 my $val = NFD($data->textContent);
2751 $val =~ s/(\pM+)//gso;
2752 $ns->value( lc($val) );
2760 sub _get_field_value {
2762 my( $root, $xpath ) = @_;
2766 # grab the set of matching nodes
2767 my @nodes = $root->findnodes( $xpath );
2768 for my $value (@nodes) {
2770 # grab all children of the node
2771 my @children = $value->childNodes();
2772 for my $child (@children) {
2774 # add the childs content to the growing buffer
2775 my $content = quotemeta($child->textContent);
2776 next if ($string =~ /$content/); # uniquify the values
2777 $string .= $child->textContent . " ";
2780 $string .= $value->textContent . " ";
2783 $string = NFD($string);
2784 $string =~ s/(\pM)//gso;
2789 sub modsdoc_to_values {
2790 my( $self, $mods ) = @_;
2792 for my $class (keys %$xpathset) {
2793 $data->{$class} = {};
2794 for my $type (keys %{$xpathset->{$class}}) {
2795 $data->{$class}->{$type} = {};
2796 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};