1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenSRF::Utils::Logger qw/:level/;
10 use OpenILS::Utils::ScriptRunner;
11 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::Fieldmapper;
18 use Time::HiRes qw(time);
20 our %supported_formats = (
21 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
22 mods => {ns => 'http://www.loc.gov/mods/'},
23 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
24 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
25 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
26 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
27 atom => {ns => 'http://www.w3.org/2005/Atom'},
28 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
32 rss10 => {ns => 'http://purl.org/rss/1.0/'},
33 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
38 my $log = 'OpenSRF::Utils::Logger';
40 my $parser = XML::LibXML->new();
41 my $xslt = XML::LibXSLT->new();
51 unless (keys %$xpathset) {
52 $log->debug("Running post_init", DEBUG);
54 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
56 unless ($supported_formats{mods}{xslt}) {
57 $log->debug("Loading MODS XSLT", DEBUG);
58 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
59 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
62 unless ($supported_formats{mods3}{xslt}) {
63 $log->debug("Loading MODS v3 XSLT", DEBUG);
64 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
65 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
69 my $req = OpenSRF::AppSession
70 ->create('open-ils.cstore')
71 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
74 if (ref $req and @$req) {
76 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
77 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
78 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
79 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
95 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
99 # --------------------------------------------------------------------------------
102 package OpenILS::Application::Ingest::Biblio;
103 use base qw/OpenILS::Application::Ingest/;
104 use Unicode::Normalize;
106 sub ro_biblio_ingest_single_object {
110 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
112 my $document = $parser->parse_string($xml);
114 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
115 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
116 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
117 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
119 $_->source($bib->id) for (@mXfe);
120 $_->record($bib->id) for (@mfr);
121 $rd->record($bib->id) if ($rd);
123 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
125 __PACKAGE__->register_method(
126 api_name => "open-ils.ingest.full.biblio.object.readonly",
127 method => "ro_biblio_ingest_single_object",
132 sub ro_biblio_ingest_single_xml {
135 my $xml = OpenILS::Application::Ingest::entityize(shift);
137 my $document = $parser->parse_string($xml);
139 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
140 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
141 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
142 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
144 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
146 __PACKAGE__->register_method(
147 api_name => "open-ils.ingest.full.biblio.xml.readonly",
148 method => "ro_biblio_ingest_single_xml",
153 sub ro_biblio_ingest_single_record {
158 OpenILS::Application::Ingest->post_init();
159 my $r = OpenSRF::AppSession
160 ->create('open-ils.cstore')
161 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
164 return undef unless ($r and @$r);
166 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
168 $_->source($rec) for (@{$res->{field_entries}});
169 $_->record($rec) for (@{$res->{full_rec}});
170 $res->{descriptor}->record($rec);
174 __PACKAGE__->register_method(
175 api_name => "open-ils.ingest.full.biblio.record.readonly",
176 method => "ro_biblio_ingest_single_record",
181 sub ro_biblio_ingest_stream_record {
185 OpenILS::Application::Ingest->post_init();
187 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
189 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
191 my $rec = $resp->content;
192 last unless (defined $rec);
194 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
195 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
197 $_->source($rec) for (@{$res->{field_entries}});
198 $_->record($rec) for (@{$res->{full_rec}});
200 $client->respond( $res );
205 __PACKAGE__->register_method(
206 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
207 method => "ro_biblio_ingest_stream_record",
212 sub ro_biblio_ingest_stream_xml {
216 OpenILS::Application::Ingest->post_init();
218 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
220 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
222 my $xml = $resp->content;
223 last unless (defined $xml);
225 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
226 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
228 $client->respond( $res );
233 __PACKAGE__->register_method(
234 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
235 method => "ro_biblio_ingest_stream_xml",
240 sub rw_biblio_ingest_stream_import {
244 OpenILS::Application::Ingest->post_init();
246 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
248 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
250 my $bib = $resp->content;
251 last unless (defined $bib);
253 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
254 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
256 $_->source($bib->id) for (@{$res->{field_entries}});
257 $_->record($bib->id) for (@{$res->{full_rec}});
259 $client->respond( $res );
264 __PACKAGE__->register_method(
265 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
266 method => "rw_biblio_ingest_stream_import",
272 # --------------------------------------------------------------------------------
275 package OpenILS::Application::Ingest::Authority;
276 use base qw/OpenILS::Application::Ingest/;
277 use Unicode::Normalize;
279 sub ro_authority_ingest_single_object {
283 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
285 my $document = $parser->parse_string($xml);
287 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
289 $_->record($bib->id) for (@mfr);
291 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
293 __PACKAGE__->register_method(
294 api_name => "open-ils.ingest.full.authority.object.readonly",
295 method => "ro_authority_ingest_single_object",
300 sub ro_authority_ingest_single_xml {
303 my $xml = OpenILS::Application::Ingest::entityize(shift);
305 my $document = $parser->parse_string($xml);
307 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
309 return { full_rec => \@mfr };
311 __PACKAGE__->register_method(
312 api_name => "open-ils.ingest.full.authority.xml.readonly",
313 method => "ro_authority_ingest_single_xml",
318 sub ro_authority_ingest_single_record {
323 OpenILS::Application::Ingest->post_init();
324 my $r = OpenSRF::AppSession
325 ->create('open-ils.cstore')
326 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
329 return undef unless ($r and @$r);
331 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
333 $_->record($rec) for (@{$res->{full_rec}});
334 $res->{descriptor}->record($rec);
338 __PACKAGE__->register_method(
339 api_name => "open-ils.ingest.full.authority.record.readonly",
340 method => "ro_authority_ingest_single_record",
345 sub ro_authority_ingest_stream_record {
349 OpenILS::Application::Ingest->post_init();
351 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
353 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
355 my $rec = $resp->content;
356 last unless (defined $rec);
358 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
359 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
361 $_->source($rec) for (@{$res->{field_entries}});
362 $_->record($rec) for (@{$res->{full_rec}});
364 $client->respond( $res );
369 __PACKAGE__->register_method(
370 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
371 method => "ro_authority_ingest_stream_record",
376 sub ro_authority_ingest_stream_xml {
380 OpenILS::Application::Ingest->post_init();
382 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
384 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
386 my $xml = $resp->content;
387 last unless (defined $xml);
389 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
390 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
392 $client->respond( $res );
397 __PACKAGE__->register_method(
398 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
399 method => "ro_authority_ingest_stream_xml",
404 sub rw_authority_ingest_stream_import {
408 OpenILS::Application::Ingest->post_init();
410 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
412 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
414 my $bib = $resp->content;
415 last unless (defined $bib);
417 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
418 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
420 $_->source($bib->id) for (@{$res->{field_entries}});
421 $_->record($bib->id) for (@{$res->{full_rec}});
423 $client->respond( $res );
428 __PACKAGE__->register_method(
429 api_name => "open-ils.ingest.full.authority.bib_stream.import",
430 method => "rw_authority_ingest_stream_import",
436 # --------------------------------------------------------------------------------
437 # MARC index extraction
439 package OpenILS::Application::Ingest::XPATH;
440 use base qw/OpenILS::Application::Ingest/;
441 use Unicode::Normalize;
443 # give this an XML documentElement and an XPATH expression
444 sub xpath_to_string {
448 my $ns_prefix = shift;
451 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
455 # grab the set of matching nodes
456 my @nodes = $xml->findnodes( $xpath );
457 for my $value (@nodes) {
459 # grab all children of the node
460 my @children = $value->childNodes();
461 for my $child (@children) {
463 # add the childs content to the growing buffer
464 my $content = quotemeta($child->textContent);
465 next if ($unique && $string =~ /$content/); # uniquify the values
466 $string .= $child->textContent . " ";
469 $string .= $value->textContent . " ";
475 sub class_index_string_xml {
481 OpenILS::Application::Ingest->post_init();
482 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
486 for my $class (@classes) {
487 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
488 for my $type ( keys %{ $xpathset->{$class} } ) {
490 my $def = $xpathset->{$class}->{$type};
491 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
496 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
497 $transform_cache{$def->{format}} = $document;
500 my $value = xpath_to_string(
501 $document->documentElement => $def->{xpath},
502 $sf->{ns} => $def->{format},
508 $value = NFD($value);
509 $value =~ s/\pM+//sgo;
510 $value =~ s/\pC+//sgo;
511 $value =~ s/\W+$//sgo;
513 $value =~ s/(\w)\.+(\w)/$1$2/sgo;
516 my $fm = $class_constructor->new;
517 $fm->value( $value );
518 $fm->field( $xpathset->{$class}->{$type}->{id} );
519 $client->respond($fm);
524 __PACKAGE__->register_method(
525 api_name => "open-ils.ingest.field_entry.class.xml",
526 method => "class_index_string_xml",
532 sub class_index_string_record {
538 OpenILS::Application::Ingest->post_init();
539 my $r = OpenSRF::AppSession
540 ->create('open-ils.cstore')
541 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
544 return undef unless ($r and @$r);
546 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
548 $client->respond($fm);
552 __PACKAGE__->register_method(
553 api_name => "open-ils.ingest.field_entry.class.record",
554 method => "class_index_string_record",
560 sub all_index_string_xml {
565 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
566 $client->respond($fm);
570 __PACKAGE__->register_method(
571 api_name => "open-ils.ingest.extract.field_entry.all.xml",
572 method => "all_index_string_xml",
578 sub all_index_string_record {
583 OpenILS::Application::Ingest->post_init();
584 my $r = OpenSRF::AppSession
585 ->create('open-ils.cstore')
586 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
589 return undef unless ($r and @$r);
591 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
593 $client->respond($fm);
597 __PACKAGE__->register_method(
598 api_name => "open-ils.ingest.extract.field_entry.all.record",
599 method => "all_index_string_record",
605 # --------------------------------------------------------------------------------
608 package OpenILS::Application::Ingest::FlatMARC;
609 use base qw/OpenILS::Application::Ingest/;
610 use Unicode::Normalize;
613 sub _marcxml_to_full_rows {
616 my $xmltype = shift || 'metabib';
618 my $type = "Fieldmapper::${xmltype}::full_rec";
622 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
624 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
625 next unless $tagline;
630 my $val = $tagline->textContent;
640 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
641 next unless $tagline;
645 $ns->tag( $tagline->getAttribute( "tag" ) );
646 my $val = $tagline->textContent;
656 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
657 next unless $tagline;
659 my $tag = $tagline->getAttribute( "tag" );
660 my $ind1 = $tagline->getAttribute( "ind1" );
661 my $ind2 = $tagline->getAttribute( "ind2" );
663 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
671 $ns->subfield( $data->getAttribute( "code" ) );
672 my $val = $data->textContent;
677 $ns->value( lc($val) );
683 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
692 $log->debug("processing [$xml]");
694 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
696 my $type = 'metabib';
697 $type = 'authority' if ($self->api_name =~ /authority/o);
699 OpenILS::Application::Ingest->post_init();
701 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
704 __PACKAGE__->register_method(
705 api_name => "open-ils.ingest.flat_marc.authority.xml",
706 method => "flat_marc_xml",
711 __PACKAGE__->register_method(
712 api_name => "open-ils.ingest.flat_marc.biblio.xml",
713 method => "flat_marc_xml",
719 sub flat_marc_record {
725 $type = 'authority' if ($self->api_name =~ /authority/o);
727 OpenILS::Application::Ingest->post_init();
728 my $r = OpenSRF::AppSession
729 ->create('open-ils.cstore')
730 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
734 return undef unless ($r and $r->marc);
736 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
737 for my $row (@rows) {
738 $client->respond($row);
739 $log->debug(JSON->perl2JSON($row), DEBUG);
743 __PACKAGE__->register_method(
744 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
745 method => "flat_marc_record",
750 __PACKAGE__->register_method(
751 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
752 method => "flat_marc_record",
758 # --------------------------------------------------------------------------------
761 package OpenILS::Application::Ingest::Biblio::Fingerprint;
762 use base qw/OpenILS::Application::Ingest/;
763 use Unicode::Normalize;
764 use OpenSRF::EX qw/:try/;
766 sub biblio_fingerprint_record {
771 OpenILS::Application::Ingest->post_init();
773 my $r = OpenSRF::AppSession
774 ->create('open-ils.cstore')
775 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
778 return undef unless ($r and $r->marc);
780 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
781 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
784 __PACKAGE__->register_method(
785 api_name => "open-ils.ingest.fingerprint.record",
786 method => "biblio_fingerprint_record",
792 sub biblio_fingerprint {
795 my $xml = OpenILS::Application::Ingest::entityize(shift);
797 $log->internal("Got MARC [$xml]");
800 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
801 my $conf = OpenSRF::Utils::SettingsClient->new;
803 my $libs = $conf->config_value(@pfx, 'script_path');
804 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
805 my $script_libs = (ref($libs)) ? $libs : [$libs];
807 $log->debug("Loading script $script_file for biblio fingerprinting...");
809 $fp_script = new OpenILS::Utils::ScriptRunner
810 ( file => $script_file,
811 paths => $script_libs,
812 reset_count => 100 );
815 $fp_script->insert('environment' => {marc => $xml} => 1);
817 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
818 $log->debug("Script for biblio fingerprinting completed successfully...");
822 __PACKAGE__->register_method(
823 api_name => "open-ils.ingest.fingerprint.xml",
824 method => "biblio_fingerprint",
830 sub biblio_descriptor {
833 my $xml = OpenILS::Application::Ingest::entityize(shift);
835 $log->internal("Got MARC [$xml]");
838 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
839 my $conf = OpenSRF::Utils::SettingsClient->new;
841 my $libs = $conf->config_value(@pfx, 'script_path');
842 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
843 my $script_libs = (ref($libs)) ? $libs : [$libs];
845 $log->debug("Loading script $script_file for biblio descriptor extraction...");
847 $rd_script = new OpenILS::Utils::ScriptRunner
848 ( file => $script_file,
849 paths => $script_libs,
850 reset_count => 100 );
853 $log->debug("Setting up environment for descriptor extraction script...");
854 $rd_script->insert('environment.marc' => $xml => 1);
855 $log->debug("Environment building complete...");
857 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
858 $log->debug("Script for biblio descriptor extraction completed successfully");
862 __PACKAGE__->register_method(
863 api_name => "open-ils.ingest.descriptor.xml",
864 method => "biblio_descriptor",
875 OpenILS::Application::Ingest->post_init();
876 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
879 sub begin_transaction {
883 OpenILS::Application::Ingest->post_init();
884 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
888 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
889 #__PACKAGE__->st_sess->connect;
890 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
891 unless (defined $r and $r) {
892 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
893 #__PACKAGE__->st_sess->disconnect;
894 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
898 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
901 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
904 sub rollback_transaction {
908 OpenILS::Application::Ingest->post_init();
909 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
913 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
915 $log->debug("Ingest isn't inside a transaction.", INFO);
918 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
924 sub commit_transaction {
928 OpenILS::Application::Ingest->post_init();
929 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
932 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
934 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
935 unless (defined $r and $r) {
936 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
937 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
939 #__PACKAGE__->st_sess->disconnect;
941 $log->debug("Ingest isn't inside a transaction.", INFO);
944 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
953 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
954 return shift( @res );
957 sub scrub_authority_record {
963 if (!OpenILS::Application::Ingest->in_transaction) {
964 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
970 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
972 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
973 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
975 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
977 $log->debug('Scrubbing failed : '.shift(), ERROR);
978 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
982 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
983 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
986 __PACKAGE__->register_method(
987 api_name => "open-ils.worm.scrub.authority",
988 method => "scrub_authority_record",
994 sub scrub_metabib_record {
999 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1000 $rec = OpenILS::Application::Ingest->storage_req(
1001 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1006 if (!OpenILS::Application::Ingest->in_transaction) {
1007 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1013 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1015 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1016 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1017 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1018 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1019 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1020 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1021 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1022 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1024 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1025 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1027 for my $mr (@$masters) {
1028 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1029 my $others = OpenILS::Application::Ingest->storage_req(
1030 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1033 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1034 $mr->master_record($others->[0]->source);
1035 OpenILS::Application::Ingest->storage_req(
1036 'open-ils.storage.direct.metabib.metarecord.remote_update',
1038 { master_record => $others->[0]->source, mods => undef }
1041 warn "Removing metarecord whose master is $rec";
1042 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1043 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1044 warn "Metarecord removed";
1045 $log->debug( "Metarecord removed", DEBUG);
1049 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1052 $log->debug('Scrubbing failed : '.shift(), ERROR);
1053 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1057 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1058 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1061 __PACKAGE__->register_method(
1062 api_name => "open-ils.worm.scrub.biblio",
1063 method => "scrub_metabib_record",
1068 sub wormize_biblio_metarecord {
1073 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1076 for my $r (@$recs) {
1079 $success = wormize_biblio_record($self => $client => $r->source);
1081 { record => $r->source,
1082 metarecord => $rec->metarecord,
1083 success => $success,
1086 } catch Error with {
1089 { record => $r->source,
1090 metarecord => $rec->metarecord,
1091 success => $success,
1099 __PACKAGE__->register_method(
1100 api_name => "open-ils.worm.wormize.metarecord",
1101 method => "wormize_biblio_metarecord",
1106 __PACKAGE__->register_method(
1107 api_name => "open-ils.worm.wormize.metarecord.nomap",
1108 method => "wormize_biblio_metarecord",
1113 __PACKAGE__->register_method(
1114 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1115 method => "wormize_biblio_metarecord",
1120 __PACKAGE__->register_method(
1121 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1122 method => "wormize_biblio_metarecord",
1129 sub wormize_biblio_record {
1134 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1135 $rec = OpenILS::Application::Ingest->storage_req(
1136 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1142 if (!OpenILS::Application::Ingest->in_transaction) {
1143 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1149 # clean up the cruft
1150 unless ($self->api_name =~ /noscrub/o) {
1151 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1155 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1158 my @rec_descriptor = ();
1166 my %metarecord = ();
1167 my @source_map = ();
1168 for my $r (@$bibs) {
1170 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1172 my $xml = $parser->parse_string($r->marc);
1174 #update the fingerprint
1175 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1176 OpenILS::Application::Ingest->storage_req(
1177 'open-ils.storage.direct.biblio.record_entry.remote_update',
1179 { fingerprint => $fp->{fingerprint},
1180 quality => int($fp->{quality}) }
1181 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1183 # the full_rec stuff
1184 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1185 $fr->record( $r->id );
1186 push @full_rec, $fr;
1189 # the rec_descriptor stuff
1190 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1191 $rd->record( $r->id );
1192 push @rec_descriptor, $rd;
1194 # the indexing field entry stuff
1195 for my $class ( qw/title author subject keyword series/ ) {
1196 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1197 $fe->source( $r->id );
1198 push @{$field_entry{$class}}, $fe;
1202 unless ($self->api_name =~ /nomap/o) {
1203 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1206 $mr = Fieldmapper::metabib::metarecord->new;
1207 $mr->fingerprint( $fp->{fingerprint} );
1208 $mr->master_record( $r->id );
1209 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1212 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1213 $mr_map->metarecord( $mr->id );
1214 $mr_map->source( $r->id );
1215 push @source_map, $mr_map;
1217 $metarecord{$mr->id} = $mr;
1219 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1221 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1222 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1227 if (@rec_descriptor) {
1228 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1230 OpenILS::Application::Ingest->storage_req(
1231 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1235 for my $mr ( values %metarecord ) {
1236 my $sources = OpenILS::Application::Ingest->storage_req(
1237 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1241 my $bibs = OpenILS::Application::Ingest->storage_req(
1242 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1243 [ map { $_->source } @$sources ]
1246 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1248 OpenILS::Application::Ingest->storage_req(
1249 'open-ils.storage.direct.metabib.metarecord.remote_update',
1251 { master_record => $master->id, mods => undef }
1255 OpenILS::Application::Ingest->storage_req(
1256 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1258 ) if (@rec_descriptor);
1260 OpenILS::Application::Ingest->storage_req(
1261 'open-ils.storage.direct.metabib.full_rec.batch.create',
1265 OpenILS::Application::Ingest->storage_req(
1266 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1267 @{ $field_entry{title} }
1268 ) if (@{ $field_entry{title} });
1270 OpenILS::Application::Ingest->storage_req(
1271 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1272 @{ $field_entry{author} }
1273 ) if (@{ $field_entry{author} });
1275 OpenILS::Application::Ingest->storage_req(
1276 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1277 @{ $field_entry{subject} }
1278 ) if (@{ $field_entry{subject} });
1280 OpenILS::Application::Ingest->storage_req(
1281 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1282 @{ $field_entry{keyword} }
1283 ) if (@{ $field_entry{keyword} });
1285 OpenILS::Application::Ingest->storage_req(
1286 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1287 @{ $field_entry{series} }
1288 ) if (@{ $field_entry{series} });
1290 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1296 $log->debug('Wormization failed : '.shift(), ERROR);
1297 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1301 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1302 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1305 __PACKAGE__->register_method(
1306 api_name => "open-ils.worm.wormize.biblio",
1307 method => "wormize_biblio_record",
1311 __PACKAGE__->register_method(
1312 api_name => "open-ils.worm.wormize.biblio.nomap",
1313 method => "wormize_biblio_record",
1317 __PACKAGE__->register_method(
1318 api_name => "open-ils.worm.wormize.biblio.noscrub",
1319 method => "wormize_biblio_record",
1323 __PACKAGE__->register_method(
1324 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1325 method => "wormize_biblio_record",
1330 sub wormize_authority_record {
1336 if (!OpenILS::Application::Ingest->in_transaction) {
1337 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1343 # clean up the cruft
1344 unless ($self->api_name =~ /noscrub/o) {
1345 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1349 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1352 my @rec_descriptor = ();
1353 for my $r (@$bibs) {
1354 my $xml = $parser->parse_string($r->marc);
1356 # the full_rec stuff
1357 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1358 $fr->record( $r->id );
1359 push @full_rec, $fr;
1362 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1363 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1364 #$rd->record( $r->id );
1365 #push @rec_descriptor, $rd;
1369 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1371 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1372 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1374 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1377 $log->debug('Wormization failed : '.shift(), ERROR);
1378 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1382 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1383 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1386 __PACKAGE__->register_method(
1387 api_name => "open-ils.worm.wormize.authority",
1388 method => "wormize_authority_record",
1392 __PACKAGE__->register_method(
1393 api_name => "open-ils.worm.wormize.authority.noscrub",
1394 method => "wormize_authority_record",
1400 # --------------------------------------------------------------------------------
1401 # MARC index extraction
1403 package OpenILS::Application::Ingest::XPATH;
1404 use base qw/OpenILS::Application::Ingest/;
1405 use Unicode::Normalize;
1407 # give this a MODS documentElement and an XPATH expression
1408 sub _xpath_to_string {
1412 my $ns_prefix = shift;
1415 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1419 # grab the set of matching nodes
1420 my @nodes = $xml->findnodes( $xpath );
1421 for my $value (@nodes) {
1423 # grab all children of the node
1424 my @children = $value->childNodes();
1425 for my $child (@children) {
1427 # add the childs content to the growing buffer
1428 my $content = quotemeta($child->textContent);
1429 next if ($unique && $string =~ /$content/); # uniquify the values
1430 $string .= $child->textContent . " ";
1433 $string .= $value->textContent . " ";
1436 return NFD($string);
1439 sub class_all_index_string_xml {
1445 OpenILS::Application::Ingest->post_init();
1446 $xml = $parser->parse_string($xml) unless (ref $xml);
1448 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1449 for my $type ( keys %{ $xpathset->{$class} } ) {
1450 my $value = _xpath_to_string(
1451 $mods_sheet->transform($xml)->documentElement,
1452 $xpathset->{$class}->{$type}->{xpath},
1453 "http://www.loc.gov/mods/",
1460 $value = NFD($value);
1461 $value =~ s/\pM+//sgo;
1462 $value =~ s/\pC+//sgo;
1463 $value =~ s/\W+$//sgo;
1465 $value =~ s/(\w)\./$1/sgo;
1466 $value = lc($value);
1468 my $fm = $class_constructor->new;
1469 $fm->value( $value );
1470 $fm->field( $xpathset->{$class}->{$type}->{id} );
1471 $client->respond($fm);
1475 __PACKAGE__->register_method(
1476 api_name => "open-ils.worm.field_entry.class.xml",
1477 method => "class_all_index_string_xml",
1483 sub class_all_index_string_record {
1489 OpenILS::Application::Ingest->post_init();
1490 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1492 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1494 $client->respond($fm);
1498 __PACKAGE__->register_method(
1499 api_name => "open-ils.worm.field_entry.class.record",
1500 method => "class_all_index_string_record",
1507 sub class_index_string_xml {
1514 OpenILS::Application::Ingest->post_init();
1515 $xml = $parser->parse_string($xml) unless (ref $xml);
1516 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1518 __PACKAGE__->register_method(
1519 api_name => "open-ils.worm.class.type.xml",
1520 method => "class_index_string_xml",
1525 sub class_index_string_record {
1532 OpenILS::Application::Ingest->post_init();
1533 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1535 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1536 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1539 __PACKAGE__->register_method(
1540 api_name => "open-ils.worm.class.type.record",
1541 method => "class_index_string_record",
1555 OpenILS::Application::Ingest->post_init();
1556 $xml = $parser->parse_string($xml) unless (ref $xml);
1557 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1559 __PACKAGE__->register_method(
1560 api_name => "open-ils.worm.xpath.xml",
1561 method => "xml_xpath",
1575 OpenILS::Application::Ingest->post_init();
1576 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1578 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1579 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1582 __PACKAGE__->register_method(
1583 api_name => "open-ils.worm.xpath.record",
1584 method => "record_xpath",
1590 # --------------------------------------------------------------------------------
1593 package OpenILS::Application::Ingest::Biblio::Leader;
1594 use base qw/OpenILS::Application::Ingest/;
1595 use Unicode::Normalize;
1597 our %marc_type_groups = (
1600 VIS => q/[gkro]{1}/,
1609 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1613 our %biblio_descriptor_code = (
1614 item_type => sub { substr($ldr,6,1); },
1617 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1618 return substr($oo8,29,1);
1619 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1620 return substr($oo8,23,1);
1624 bib_level => sub { substr($ldr,7,1); },
1625 control_type => sub { substr($ldr,8,1); },
1626 char_encoding => sub { substr($ldr,9,1); },
1627 enc_level => sub { substr($ldr,17,1); },
1628 cat_form => sub { substr($ldr,18,1); },
1629 pub_status => sub { substr($ldr,5,1); },
1630 item_lang => sub { substr($oo8,35,3); },
1631 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1632 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1633 audience => sub { substr($oo8,22,1); },
1636 sub _extract_biblio_descriptors {
1639 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1640 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1641 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1643 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1644 for my $rd_field ( keys %biblio_descriptor_code ) {
1645 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1651 sub extract_biblio_desc_xml {
1656 $xml = $parser->parse_string($xml) unless (ref $xml);
1658 return _extract_biblio_descriptors( $xml );
1660 __PACKAGE__->register_method(
1661 api_name => "open-ils.worm.biblio_leader.xml",
1662 method => "extract_biblio_desc_xml",
1667 sub extract_biblio_desc_record {
1672 OpenILS::Application::Ingest->post_init();
1673 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1675 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1676 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1679 __PACKAGE__->register_method(
1680 api_name => "open-ils.worm.biblio_leader.record",
1681 method => "extract_biblio_desc_record",
1686 # --------------------------------------------------------------------------------
1689 package OpenILS::Application::Ingest::FlatMARC;
1690 use base qw/OpenILS::Application::Ingest/;
1691 use Unicode::Normalize;
1694 sub _marcxml_to_full_rows {
1696 my $marcxml = shift;
1697 my $xmltype = shift || 'metabib';
1699 my $type = "Fieldmapper::${xmltype}::full_rec";
1703 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1705 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1706 next unless $tagline;
1708 my $ns = $type->new;
1711 my $val = $tagline->textContent;
1713 $val =~ s/\pM+//sgo;
1714 $val =~ s/\pC+//sgo;
1715 $val =~ s/\W+$//sgo;
1721 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1722 next unless $tagline;
1724 my $ns = $type->new;
1726 $ns->tag( $tagline->getAttribute( "tag" ) );
1727 my $val = $tagline->textContent;
1729 $val =~ s/\pM+//sgo;
1730 $val =~ s/\pC+//sgo;
1731 $val =~ s/\W+$//sgo;
1737 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1738 next unless $tagline;
1740 my $tag = $tagline->getAttribute( "tag" );
1741 my $ind1 = $tagline->getAttribute( "ind1" );
1742 my $ind2 = $tagline->getAttribute( "ind2" );
1744 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1747 my $ns = $type->new;
1752 $ns->subfield( $data->getAttribute( "code" ) );
1753 my $val = $data->textContent;
1755 $val =~ s/\pM+//sgo;
1756 $val =~ s/\pC+//sgo;
1757 $val =~ s/\W+$//sgo;
1758 $ns->value( lc($val) );
1764 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1773 $xml = $parser->parse_string($xml) unless (ref $xml);
1775 my $type = 'metabib';
1776 $type = 'authority' if ($self->api_name =~ /authority/o);
1778 OpenILS::Application::Ingest->post_init();
1780 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1783 __PACKAGE__->register_method(
1784 api_name => "open-ils.worm.flat_marc.authority.xml",
1785 method => "flat_marc_xml",
1790 __PACKAGE__->register_method(
1791 api_name => "open-ils.worm.flat_marc.biblio.xml",
1792 method => "flat_marc_xml",
1798 sub flat_marc_record {
1803 my $type = 'biblio';
1804 $type = 'authority' if ($self->api_name =~ /authority/o);
1806 OpenILS::Application::Ingest->post_init();
1807 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1809 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1812 __PACKAGE__->register_method(
1813 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1814 method => "flat_marc_record",
1819 __PACKAGE__->register_method(
1820 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1821 method => "flat_marc_record",
1828 # --------------------------------------------------------------------------------
1831 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1832 use base qw/OpenILS::Application::Ingest/;
1833 use Unicode::Normalize;
1834 use OpenSRF::EX qw/:try/;
1836 my @fp_mods_xpath = (
1837 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1840 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1841 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1842 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1843 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1846 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1848 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1849 $text =~ s/\pM+//gso;
1850 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1852 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1853 $text =~ s/\s+/ /sgo;
1854 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1855 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1856 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1857 $text =~ s/\b(?:the|an?)\b//sgo;
1858 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1859 $text =~ s/\[.[^\]]+\]//sgo;
1860 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1861 $text =~ s/\s*[;\/\.]*$//sgo;
1862 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1867 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1868 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1871 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1873 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1874 $text =~ s/\pM+//gso;
1875 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1877 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1878 $text =~ s/\s+/ /sgo;
1879 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1880 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1881 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1882 $text =~ s/,?\s+.*$//sgo;
1883 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1888 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
1891 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
1892 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
1893 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
1894 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
1895 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1896 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1897 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1898 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1901 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1903 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1904 $text =~ s/\pM+//gso;
1905 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1907 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1908 $text =~ s/\s+/ /sgo;
1909 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1910 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1911 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1912 $text =~ s/\b(?:the|an?)\b//sgo;
1913 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1914 $text =~ s/\[.[^\]]+\]//sgo;
1915 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1916 $text =~ s/\s*[;\/\.]*$//sgo;
1917 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1922 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1923 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1924 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
1925 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
1928 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1930 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1931 $text =~ s/\pM+//gso;
1932 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1934 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1935 $text =~ s/\s+/ /sgo;
1936 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1937 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1938 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1939 $text =~ s/,?\s+.*$//sgo;
1940 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1947 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
1951 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
1955 my $match_index = 0;
1956 my $block_index = 1;
1957 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
1958 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
1960 my $block_name_index = 0;
1961 my $block_value_index = 1;
1962 my $block = $fp_mods_xpath[$block_index];
1963 while ( my $part = $$block[$block_value_index] ) {
1965 for my $xpath ( @{ $part->{xpath} } ) {
1966 $text = $mods->findvalue( $xpath );
1970 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
1974 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
1975 $fp_string .= $text;
1978 $block_name_index += 2;
1979 $block_value_index += 2;
1983 $fp_string =~ s/\W+//gso;
1984 $log->debug("Fingerprint is [$fp_string]", INFO);;
1994 sub refingerprint_bibrec {
2000 if (!OpenILS::Application::Ingest->in_transaction) {
2001 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2007 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2008 for my $b (@$bibs) {
2009 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2011 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2013 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2015 OpenILS::Application::Ingest->storage_req(
2016 'open-ils.storage.direct.biblio.record_entry.remote_update',
2018 { fingerprint => $fp->{fingerprint},
2019 quality => $fp->{quality} }
2022 if ($self->api_name !~ /nomap/o) {
2023 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2024 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2029 if (ref($old_source_map) and @$old_source_map) {
2030 for my $m (@$old_source_map) {
2031 $old_mrid = $m->metarecord;
2032 OpenILS::Application::Ingest->storage_req(
2033 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2039 my $old_sm = OpenILS::Application::Ingest->storage_req(
2040 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2041 { metarecord => $old_mrid }
2044 if (ref($old_sm) and @$old_sm == 0) {
2045 OpenILS::Application::Ingest->storage_req(
2046 'open-ils.storage.direct.metabib.metarecord.delete',
2051 my $mr = OpenILS::Application::Ingest->storage_req(
2052 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2053 { fingerprint => $fp->{fingerprint} }
2057 $mr = Fieldmapper::metabib::metarecord->new;
2058 $mr->fingerprint( $fp->{fingerprint} );
2059 $mr->master_record( $b->id );
2060 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2063 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2064 $mr_map->metarecord( $mr->id );
2065 $mr_map->source( $b->id );
2066 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2070 $client->respond($b->id);
2074 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2078 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2079 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2082 __PACKAGE__->register_method(
2083 api_name => "open-ils.worm.fingerprint.record.update",
2084 method => "refingerprint_bibrec",
2090 __PACKAGE__->register_method(
2091 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2092 method => "refingerprint_bibrec",
2099 sub fingerprint_bibrec {
2104 OpenILS::Application::Ingest->post_init();
2105 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2107 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2108 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2112 __PACKAGE__->register_method(
2113 api_name => "open-ils.worm.fingerprint.record",
2114 method => "fingerprint_bibrec",
2120 sub fingerprint_mods {
2125 OpenILS::Application::Ingest->post_init();
2126 my $mods = $parser->parse_string($xml)->documentElement;
2128 return _fp_mods( $mods );
2130 __PACKAGE__->register_method(
2131 api_name => "open-ils.worm.fingerprint.mods",
2132 method => "fingerprint_mods",
2137 sub fingerprint_marc {
2142 $xml = $parser->parse_string($xml) unless (ref $xml);
2144 OpenILS::Application::Ingest->post_init();
2145 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2146 $log->debug("Returning [$fp] as fingerprint", INFO);
2149 __PACKAGE__->register_method(
2150 api_name => "open-ils.worm.fingerprint.marc",
2151 method => "fingerprint_marc",
2159 sub biblio_fingerprint_record {
2164 OpenILS::Application::Ingest->post_init();
2166 my $marc = OpenILS::Application::Ingest
2167 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2170 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2171 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2174 __PACKAGE__->register_method(
2175 api_name => "open-ils.worm.fingerprint.record",
2176 method => "biblio_fingerprint_record",
2182 sub biblio_fingerprint {
2187 OpenILS::Application::Ingest->post_init();
2189 $marc = $parser->parse_string($marc) unless (ref $marc);
2191 my $mods = OpenILS::Application::Ingest::entityize(
2193 ->transform( $marc )
2199 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2202 $log->internal("Got MARC [$marc]");
2203 $log->internal("Created MODS [$mods]");
2206 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2207 my $conf = OpenSRF::Utils::SettingsClient->new;
2209 my $libs = $conf->config_value(@pfx, 'script_path');
2210 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2211 my $script_libs = (ref($libs)) ? $libs : [$libs];
2213 $log->debug("Loading script $script_file for biblio fingerprinting...");
2215 $fp_script = new OpenILS::Utils::ScriptRunner
2216 ( file => $script_file,
2217 paths => $script_libs,
2218 reset_count => 1000 );
2221 $log->debug("Applying environment for biblio fingerprinting...");
2223 my $env = {marc => $marc, mods => $mods};
2224 #my $res = {fingerprint => '', quality => '0'};
2226 $fp_script->insert('environment' => $env);
2227 #$fp_script->insert('result' => $res);
2229 $log->debug("Running script for biblio fingerprinting...");
2231 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2233 $log->debug("Script for biblio fingerprinting completed successfully...");
2237 __PACKAGE__->register_method(
2238 api_name => "open-ils.worm.fingerprint.marc",
2239 method => "biblio_fingerprint",
2244 # --------------------------------------------------------------------------------
2258 my $create_source_map;
2273 my %descriptor_code = (
2274 item_type => 'substr($ldr,6,1)',
2275 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2276 bib_level => 'substr($ldr,7,1)',
2277 control_type => 'substr($ldr,8,1)',
2278 char_encoding => 'substr($ldr,9,1)',
2279 enc_level => 'substr($ldr,17,1)',
2280 cat_form => 'substr($ldr,18,1)',
2281 pub_status => 'substr($ldr,5,1)',
2282 item_lang => 'substr($oo8,35,3)',
2283 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2284 audience => 'substr($oo8,22,1)',
2294 if ($self->api_name =~ /no_map/o) {
2298 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2300 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2302 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2304 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2306 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2307 unless ($sm_lookup);
2308 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2309 unless ($mr_lookup);
2310 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2311 unless ($mr_update);
2312 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2314 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2315 unless ($update_entry);
2316 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2317 unless ($rm_old_sm);
2318 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2319 unless ($rm_old_rd);
2320 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2321 unless ($rm_old_fr);
2322 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2323 unless ($rm_old_tr);
2324 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2325 unless ($rm_old_ar);
2326 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2327 unless ($rm_old_sr);
2328 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2329 unless ($rm_old_kr);
2330 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2331 unless ($rm_old_ser);
2332 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2333 unless ($mr_create);
2334 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2335 unless ($create_source_map);
2336 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2337 unless ($rd_create);
2338 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2339 unless ($fr_create);
2340 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2341 unless ($$create{title});
2342 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2343 unless ($$create{author});
2344 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2345 unless ($$create{subject});
2346 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2347 unless ($$create{keyword});
2348 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2349 unless ($$create{series});
2352 my ($outer_xact) = $in_xact->run;
2354 unless ($outer_xact) {
2355 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2356 my ($r) = $begin->run($client);
2357 unless (defined $r and $r) {
2359 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2362 } catch Error with {
2363 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2373 for my $entry ( $lookup->run(@docids) ) {
2374 # step -1: grab the doc from storage
2375 next unless ($entry);
2378 my $xslt_doc = $parser->parse_file(
2379 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2380 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2383 my $xml = $entry->marc;
2384 my $docid = $entry->id;
2385 my $marcdoc = $parser->parse_string($xml);
2386 my $modsdoc = $mods_sheet->transform($marcdoc);
2388 my $mods = $modsdoc->documentElement;
2389 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2391 $entry->fingerprint( fingerprint_mods( $mods ) );
2392 push @entry_list, $entry;
2394 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2397 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2398 if (!$mr || !@$mr) {
2399 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2400 $mr = new Fieldmapper::metabib::metarecord;
2401 $mr->fingerprint( $entry->fingerprint );
2402 $mr->master_record( $entry->id );
2403 my ($new_mr) = $mr_create->run($mr);
2405 unless (defined $mr) {
2406 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2409 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2414 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2415 $sm->metarecord( $mr->id );
2416 $sm->source( $entry->id );
2417 push @source_maps, $sm;
2420 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2421 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2423 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2424 for my $rd_field ( keys %descriptor_code ) {
2425 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2427 $rd_obj->record( $docid );
2428 push @rd_list, $rd_obj;
2430 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2432 # step 2: build the KOHA rows
2433 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2434 $_->record( $docid ) for (@tmp_list);
2435 push @ns_list, @tmp_list;
2439 last unless ($self->api_name =~ /batch$/o);
2442 $rm_old_rd->run( { record => \@docids } );
2443 $rm_old_fr->run( { record => \@docids } );
2444 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2445 $rm_old_tr->run( { source => \@docids } );
2446 $rm_old_ar->run( { source => \@docids } );
2447 $rm_old_sr->run( { source => \@docids } );
2448 $rm_old_kr->run( { source => \@docids } );
2449 $rm_old_ser->run( { source => \@docids } );
2452 my ($sm) = $create_source_map->run(@source_maps);
2453 unless (defined $sm) {
2454 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2456 my ($mr) = $mr_update->run(@mr_list);
2457 unless (defined $mr) {
2458 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2462 my ($re) = $update_entry->run(@entry_list);
2463 unless (defined $re) {
2464 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2467 my ($rd) = $rd_create->run(@rd_list);
2468 unless (defined $rd) {
2469 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2472 my ($fr) = $fr_create->run(@ns_list);
2473 unless (defined $fr) {
2474 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2477 # step 5: insert the new metadata
2478 for my $class ( qw/title author subject keyword series/ ) {
2480 for my $doc ( @mods_data ) {
2481 my ($did) = keys %$doc;
2482 my ($data) = values %$doc;
2484 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2485 for my $row ( keys %{ $$data{$class} } ) {
2486 next unless (exists $$data{$class}{$row});
2487 next unless ($$data{$class}{$row}{value});
2488 my $fm_obj = $fm_constructor->new;
2489 $fm_obj->value( $$data{$class}{$row}{value} );
2490 $fm_obj->field( $$data{$class}{$row}{field_id} );
2491 $fm_obj->source( $did );
2492 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2494 push @md_list, $fm_obj;
2498 my ($cr) = $$create{$class}->run(@md_list);
2499 unless (defined $cr) {
2500 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2504 unless ($outer_xact) {
2505 $log->debug("Commiting transaction started by the Ingest.", INFO);
2506 my ($c) = $commit->run;
2507 unless (defined $c and $c) {
2509 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2515 __PACKAGE__->register_method(
2516 api_name => "open-ils.worm.wormize",
2517 method => "wormize",
2521 __PACKAGE__->register_method(
2522 api_name => "open-ils.worm.wormize.no_map",
2523 method => "wormize",
2527 __PACKAGE__->register_method(
2528 api_name => "open-ils.worm.wormize.batch",
2529 method => "wormize",
2533 __PACKAGE__->register_method(
2534 api_name => "open-ils.worm.wormize.no_map.batch",
2535 method => "wormize",
2550 my $acreate_source_map;
2565 sub authority_wormize {
2572 if ($self->api_name =~ /no_map/o) {
2576 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2578 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2580 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2582 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2584 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2586 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2587 unless ($aupdate_entry);
2588 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2589 unless ($arm_old_rd);
2590 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2591 unless ($arm_old_fr);
2592 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2593 unless ($ard_create);
2594 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2595 unless ($afr_create);
2598 my ($outer_xact) = $in_xact->run;
2600 unless ($outer_xact) {
2601 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2602 my ($r) = $begin->run($client);
2603 unless (defined $r and $r) {
2605 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2608 } catch Error with {
2609 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2619 for my $entry ( $lookup->run(@docids) ) {
2620 # step -1: grab the doc from storage
2621 next unless ($entry);
2624 # my $xslt_doc = $parser->parse_file(
2625 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2626 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2629 my $xml = $entry->marc;
2630 my $docid = $entry->id;
2631 my $marcdoc = $parser->parse_string($xml);
2632 #my $madsdoc = $mads_sheet->transform($marcdoc);
2634 #my $mads = $madsdoc->documentElement;
2635 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2637 push @entry_list, $entry;
2639 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2640 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2642 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2643 for my $rd_field ( keys %descriptor_code ) {
2644 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2646 $rd_obj->record( $docid );
2647 push @rd_list, $rd_obj;
2649 # step 2: build the KOHA rows
2650 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2651 $_->record( $docid ) for (@tmp_list);
2652 push @ns_list, @tmp_list;
2656 last unless ($self->api_name =~ /batch$/o);
2659 $arm_old_rd->run( { record => \@docids } );
2660 $arm_old_fr->run( { record => \@docids } );
2662 my ($rd) = $ard_create->run(@rd_list);
2663 unless (defined $rd) {
2664 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2667 my ($fr) = $fr_create->run(@ns_list);
2668 unless (defined $fr) {
2669 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2672 unless ($outer_xact) {
2673 $log->debug("Commiting transaction started by Ingest.", INFO);
2674 my ($c) = $commit->run;
2675 unless (defined $c and $c) {
2677 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2683 __PACKAGE__->register_method(
2684 api_name => "open-ils.worm.authortiy.wormize",
2685 method => "wormize",
2689 __PACKAGE__->register_method(
2690 api_name => "open-ils.worm.authority.wormize.batch",
2691 method => "wormize",
2697 # --------------------------------------------------------------------------------
2700 sub _marcxml_to_full_rows {
2702 my $marcxml = shift;
2703 my $type = shift || 'Fieldmapper::metabib::full_rec';
2707 my $root = $marcxml->documentElement;
2709 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2710 next unless $tagline;
2712 my $ns = new Fieldmapper::metabib::full_rec;
2715 my $val = NFD($tagline->textContent);
2716 $val =~ s/(\pM+)//gso;
2722 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2723 next unless $tagline;
2725 my $ns = new Fieldmapper::metabib::full_rec;
2727 $ns->tag( $tagline->getAttribute( "tag" ) );
2728 my $val = NFD($tagline->textContent);
2729 $val =~ s/(\pM+)//gso;
2735 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2736 next unless $tagline;
2738 my $tag = $tagline->getAttribute( "tag" );
2739 my $ind1 = $tagline->getAttribute( "ind1" );
2740 my $ind2 = $tagline->getAttribute( "ind2" );
2742 for my $data ( $tagline->childNodes ) {
2745 my $ns = $type->new;
2750 $ns->subfield( $data->getAttribute( "code" ) );
2751 my $val = NFD($data->textContent);
2752 $val =~ s/(\pM+)//gso;
2753 $ns->value( lc($val) );
2761 sub _get_field_value {
2763 my( $root, $xpath ) = @_;
2767 # grab the set of matching nodes
2768 my @nodes = $root->findnodes( $xpath );
2769 for my $value (@nodes) {
2771 # grab all children of the node
2772 my @children = $value->childNodes();
2773 for my $child (@children) {
2775 # add the childs content to the growing buffer
2776 my $content = quotemeta($child->textContent);
2777 next if ($string =~ /$content/); # uniquify the values
2778 $string .= $child->textContent . " ";
2781 $string .= $value->textContent . " ";
2784 $string = NFD($string);
2785 $string =~ s/(\pM)//gso;
2790 sub modsdoc_to_values {
2791 my( $self, $mods ) = @_;
2793 for my $class (keys %$xpathset) {
2794 $data->{$class} = {};
2795 for my $type (keys %{$xpathset->{$class}}) {
2796 $data->{$class}->{$type} = {};
2797 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};