1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
21 our %supported_formats = (
22 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
23 mods => {ns => 'http://www.loc.gov/mods/'},
24 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
26 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28 atom => {ns => 'http://www.w3.org/2005/Atom'},
29 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
33 rss10 => {ns => 'http://purl.org/rss/1.0/'},
34 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
39 my $log = 'OpenSRF::Utils::Logger';
41 my $parser = XML::LibXML->new();
42 my $xslt = XML::LibXSLT->new();
52 unless (keys %$xpathset) {
53 $log->debug("Running post_init", DEBUG);
55 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57 unless ($supported_formats{mods}{xslt}) {
58 $log->debug("Loading MODS XSLT", DEBUG);
59 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63 unless ($supported_formats{mods3}{xslt}) {
64 $log->debug("Loading MODS v3 XSLT", DEBUG);
65 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
70 my $req = OpenSRF::AppSession
71 ->create('open-ils.cstore')
72 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75 if (ref $req and @$req) {
77 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
96 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
100 # --------------------------------------------------------------------------------
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
107 sub rw_biblio_ingest_single_object {
112 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113 return undef unless ($blob);
115 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116 $bib->quality( $blob->{fingerprint}->{quality} );
118 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
120 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
122 # update full_rec stuff ...
123 my $tmp = $cstore->request(
124 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125 { record => $bib->id }
128 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
131 # update rec_descriptor stuff ...
132 $tmp = $cstore->request(
133 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134 { record => $bib->id }
137 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
140 # deal with classed fields...
141 for my $class ( qw/title author subject keyword series/ ) {
142 $tmp = $cstore->request(
143 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144 { source => $bib->id }
147 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
149 for my $obj ( @{ $blob->{field_entries} } ) {
150 my $class = $obj->class_name;
151 $class =~ s/^Fieldmapper:://o;
153 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
158 $tmp = $cstore->request(
159 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
160 { source => $bib->id }
163 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
166 my $old_mrs = $cstore->request(
167 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
168 )->gather(1) if (@$tmp);
170 $old_mrs = [] if (!ref($old_mrs));
173 for my $m (@$old_mrs) {
174 if ($m->fingerprint eq $bib->fingerprint) {
177 my $others = $cstore->request(
178 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->metarecord }
183 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
193 # Get the matchin MR, if any.
194 $mr = $cstore->request(
195 'open-ils.cstore.direct.metabib.metarecord.search',
196 { fingerprint => $bib->fingerprint }
199 $holds = $cstore->request(
200 'open-ils.cstore.direct.action.hold_request.search.atomic',
201 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
202 )->gather(1) if (@$old_mrs);
205 for my $h (@$holds) {
207 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
214 $mr = new Fieldmapper::metabib::metarecord;
215 $mr->fingerprint( $bib->fingerprint );
216 $mr->master_record( $bib->id );
219 "open-ils.cstore.direct.metabib.metarecord.create",
220 $mr => { quiet => 'true' }
224 for my $h (grep { !$_->ischanged } @$holds) {
226 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
229 my $mrm = $cstore->request(
230 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
231 { metarecord => $mr->id }
235 my $best = $cstore->request(
236 "open-ils.cstore.direct.biblio.record_entry.search",
237 { id => [ map { $_->source } @$mrm ] },
238 { 'select' => { bre => [ qw/id quality/ ] },
239 order_by => { bre => "quality desc" },
244 if ($best->quality > $bib->quality) {
245 $mr->master_record($best->id);
247 $mr->master_record($bib->id);
250 $mr->master_record($bib->id);
255 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
258 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
259 $mrm->source($bib->id);
260 $mrm->metarecord($mr->id);
262 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
263 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
265 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
269 __PACKAGE__->register_method(
270 api_name => "open-ils.ingest.full.biblio.object",
271 method => "rw_biblio_ingest_single_object",
276 sub rw_biblio_ingest_single_record {
281 OpenILS::Application::Ingest->post_init();
282 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
283 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
285 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
287 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
290 return undef unless ($r and @$r);
292 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
294 __PACKAGE__->register_method(
295 api_name => "open-ils.ingest.full.biblio.record",
296 method => "rw_biblio_ingest_single_record",
301 sub ro_biblio_ingest_single_object {
305 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
307 my $document = $parser->parse_string($xml);
309 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
310 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
311 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
312 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
314 $_->source($bib->id) for (@mXfe);
315 $_->record($bib->id) for (@mfr);
316 $rd->record($bib->id) if ($rd);
318 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
320 __PACKAGE__->register_method(
321 api_name => "open-ils.ingest.full.biblio.object.readonly",
322 method => "ro_biblio_ingest_single_object",
327 sub ro_biblio_ingest_single_xml {
330 my $xml = OpenILS::Application::Ingest::entityize(shift);
332 my $document = $parser->parse_string($xml);
334 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
335 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
336 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
337 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
339 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
341 __PACKAGE__->register_method(
342 api_name => "open-ils.ingest.full.biblio.xml.readonly",
343 method => "ro_biblio_ingest_single_xml",
348 sub ro_biblio_ingest_single_record {
353 OpenILS::Application::Ingest->post_init();
354 my $r = OpenSRF::AppSession
355 ->create('open-ils.cstore')
356 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
359 return undef unless ($r and @$r);
361 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
363 $_->source($rec) for (@{$res->{field_entries}});
364 $_->record($rec) for (@{$res->{full_rec}});
365 $res->{descriptor}->record($rec);
369 __PACKAGE__->register_method(
370 api_name => "open-ils.ingest.full.biblio.record.readonly",
371 method => "ro_biblio_ingest_single_record",
376 sub ro_biblio_ingest_stream_record {
380 OpenILS::Application::Ingest->post_init();
382 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
384 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
386 my $rec = $resp->content;
387 last unless (defined $rec);
389 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
390 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
392 $_->source($rec) for (@{$res->{field_entries}});
393 $_->record($rec) for (@{$res->{full_rec}});
395 $client->respond( $res );
400 __PACKAGE__->register_method(
401 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
402 method => "ro_biblio_ingest_stream_record",
407 sub ro_biblio_ingest_stream_xml {
411 OpenILS::Application::Ingest->post_init();
413 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
415 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
417 my $xml = $resp->content;
418 last unless (defined $xml);
420 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
421 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
423 $client->respond( $res );
428 __PACKAGE__->register_method(
429 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
430 method => "ro_biblio_ingest_stream_xml",
435 sub rw_biblio_ingest_stream_import {
439 OpenILS::Application::Ingest->post_init();
441 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
443 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
445 my $bib = $resp->content;
446 last unless (defined $bib);
448 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
449 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
451 $_->source($bib->id) for (@{$res->{field_entries}});
452 $_->record($bib->id) for (@{$res->{full_rec}});
454 $client->respond( $res );
459 __PACKAGE__->register_method(
460 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
461 method => "rw_biblio_ingest_stream_import",
467 # --------------------------------------------------------------------------------
470 package OpenILS::Application::Ingest::Authority;
471 use base qw/OpenILS::Application::Ingest/;
472 use Unicode::Normalize;
474 sub ro_authority_ingest_single_object {
478 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
480 my $document = $parser->parse_string($xml);
482 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
484 $_->record($bib->id) for (@mfr);
486 return { full_rec => \@mfr };
488 __PACKAGE__->register_method(
489 api_name => "open-ils.ingest.full.authority.object.readonly",
490 method => "ro_authority_ingest_single_object",
495 sub ro_authority_ingest_single_xml {
498 my $xml = OpenILS::Application::Ingest::entityize(shift);
500 my $document = $parser->parse_string($xml);
502 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
504 return { full_rec => \@mfr };
506 __PACKAGE__->register_method(
507 api_name => "open-ils.ingest.full.authority.xml.readonly",
508 method => "ro_authority_ingest_single_xml",
513 sub ro_authority_ingest_single_record {
518 OpenILS::Application::Ingest->post_init();
519 my $r = OpenSRF::AppSession
520 ->create('open-ils.cstore')
521 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
524 return undef unless ($r and @$r);
526 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
528 $_->record($rec) for (@{$res->{full_rec}});
529 $res->{descriptor}->record($rec);
533 __PACKAGE__->register_method(
534 api_name => "open-ils.ingest.full.authority.record.readonly",
535 method => "ro_authority_ingest_single_record",
540 sub ro_authority_ingest_stream_record {
544 OpenILS::Application::Ingest->post_init();
546 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
548 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
550 my $rec = $resp->content;
551 last unless (defined $rec);
553 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
554 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
556 $_->record($rec) for (@{$res->{full_rec}});
558 $client->respond( $res );
563 __PACKAGE__->register_method(
564 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
565 method => "ro_authority_ingest_stream_record",
570 sub ro_authority_ingest_stream_xml {
574 OpenILS::Application::Ingest->post_init();
576 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
578 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
580 my $xml = $resp->content;
581 last unless (defined $xml);
583 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
584 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
586 $client->respond( $res );
591 __PACKAGE__->register_method(
592 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
593 method => "ro_authority_ingest_stream_xml",
598 sub rw_authority_ingest_stream_import {
602 OpenILS::Application::Ingest->post_init();
604 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
606 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
608 my $bib = $resp->content;
609 last unless (defined $bib);
611 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
612 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
614 $_->record($bib->id) for (@{$res->{full_rec}});
616 $client->respond( $res );
621 __PACKAGE__->register_method(
622 api_name => "open-ils.ingest.full.authority.bib_stream.import",
623 method => "rw_authority_ingest_stream_import",
629 # --------------------------------------------------------------------------------
630 # MARC index extraction
632 package OpenILS::Application::Ingest::XPATH;
633 use base qw/OpenILS::Application::Ingest/;
634 use Unicode::Normalize;
636 # give this an XML documentElement and an XPATH expression
637 sub xpath_to_string {
641 my $ns_prefix = shift;
644 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
648 # grab the set of matching nodes
649 my @nodes = $xml->findnodes( $xpath );
650 for my $value (@nodes) {
652 # grab all children of the node
653 my @children = $value->childNodes();
654 for my $child (@children) {
656 # add the childs content to the growing buffer
657 my $content = quotemeta($child->textContent);
658 next if ($unique && $string =~ /$content/); # uniquify the values
659 $string .= $child->textContent . " ";
662 $string .= $value->textContent . " ";
668 sub class_index_string_xml {
674 OpenILS::Application::Ingest->post_init();
675 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
679 for my $class (@classes) {
680 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
681 for my $type ( keys %{ $xpathset->{$class} } ) {
683 my $def = $xpathset->{$class}->{$type};
684 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
689 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
690 $transform_cache{$def->{format}} = $document;
693 my $value = xpath_to_string(
694 $document->documentElement => $def->{xpath},
695 $sf->{ns} => $def->{format},
701 $value = NFD($value);
702 $value =~ s/\pM+//sgo;
703 $value =~ s/\pC+//sgo;
704 $value =~ s/\W+$//sgo;
706 $value =~ s/\b\.+\b//sgo;
709 my $fm = $class_constructor->new;
710 $fm->value( $value );
711 $fm->field( $xpathset->{$class}->{$type}->{id} );
712 $client->respond($fm);
717 __PACKAGE__->register_method(
718 api_name => "open-ils.ingest.field_entry.class.xml",
719 method => "class_index_string_xml",
725 sub class_index_string_record {
731 OpenILS::Application::Ingest->post_init();
732 my $r = OpenSRF::AppSession
733 ->create('open-ils.cstore')
734 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
737 return undef unless ($r and @$r);
739 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
741 $client->respond($fm);
745 __PACKAGE__->register_method(
746 api_name => "open-ils.ingest.field_entry.class.record",
747 method => "class_index_string_record",
753 sub all_index_string_xml {
758 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
759 $client->respond($fm);
763 __PACKAGE__->register_method(
764 api_name => "open-ils.ingest.extract.field_entry.all.xml",
765 method => "all_index_string_xml",
771 sub all_index_string_record {
776 OpenILS::Application::Ingest->post_init();
777 my $r = OpenSRF::AppSession
778 ->create('open-ils.cstore')
779 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
782 return undef unless ($r and @$r);
784 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
786 $client->respond($fm);
790 __PACKAGE__->register_method(
791 api_name => "open-ils.ingest.extract.field_entry.all.record",
792 method => "all_index_string_record",
798 # --------------------------------------------------------------------------------
801 package OpenILS::Application::Ingest::FlatMARC;
802 use base qw/OpenILS::Application::Ingest/;
803 use Unicode::Normalize;
806 sub _marcxml_to_full_rows {
809 my $xmltype = shift || 'metabib';
811 my $type = "Fieldmapper::${xmltype}::full_rec";
815 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
817 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
818 next unless $tagline;
823 my $val = $tagline->textContent;
833 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
834 next unless $tagline;
838 $ns->tag( $tagline->getAttribute( "tag" ) );
839 my $val = $tagline->textContent;
849 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
850 next unless $tagline;
852 my $tag = $tagline->getAttribute( "tag" );
853 my $ind1 = $tagline->getAttribute( "ind1" );
854 my $ind2 = $tagline->getAttribute( "ind2" );
856 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
864 $ns->subfield( $data->getAttribute( "code" ) );
865 my $val = $data->textContent;
870 $ns->value( lc($val) );
876 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
885 $log->debug("processing [$xml]");
887 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
889 my $type = 'metabib';
890 $type = 'authority' if ($self->api_name =~ /authority/o);
892 OpenILS::Application::Ingest->post_init();
894 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
897 __PACKAGE__->register_method(
898 api_name => "open-ils.ingest.flat_marc.authority.xml",
899 method => "flat_marc_xml",
904 __PACKAGE__->register_method(
905 api_name => "open-ils.ingest.flat_marc.biblio.xml",
906 method => "flat_marc_xml",
912 sub flat_marc_record {
918 $type = 'authority' if ($self->api_name =~ /authority/o);
920 OpenILS::Application::Ingest->post_init();
921 my $r = OpenSRF::AppSession
922 ->create('open-ils.cstore')
923 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
927 return undef unless ($r and $r->marc);
929 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
930 for my $row (@rows) {
931 $client->respond($row);
932 $log->debug(JSON->perl2JSON($row), DEBUG);
936 __PACKAGE__->register_method(
937 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
938 method => "flat_marc_record",
943 __PACKAGE__->register_method(
944 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
945 method => "flat_marc_record",
951 # --------------------------------------------------------------------------------
954 package OpenILS::Application::Ingest::Biblio::Fingerprint;
955 use base qw/OpenILS::Application::Ingest/;
956 use Unicode::Normalize;
957 use OpenSRF::EX qw/:try/;
959 sub biblio_fingerprint_record {
964 OpenILS::Application::Ingest->post_init();
966 my $r = OpenSRF::AppSession
967 ->create('open-ils.cstore')
968 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
971 return undef unless ($r and $r->marc);
973 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
974 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
975 $fp->{quality} = int($fp->{quality});
978 __PACKAGE__->register_method(
979 api_name => "open-ils.ingest.fingerprint.record",
980 method => "biblio_fingerprint_record",
986 sub biblio_fingerprint {
989 my $xml = OpenILS::Application::Ingest::entityize(shift);
991 $log->internal("Got MARC [$xml]");
994 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
995 my $conf = OpenSRF::Utils::SettingsClient->new;
997 my $libs = $conf->config_value(@pfx, 'script_path');
998 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
999 my $script_libs = (ref($libs)) ? $libs : [$libs];
1001 $log->debug("Loading script $script_file for biblio fingerprinting...");
1003 $fp_script = new OpenILS::Utils::ScriptRunner
1004 ( file => $script_file,
1005 paths => $script_libs,
1006 reset_count => 100 );
1009 $fp_script->insert('environment' => {marc => $xml} => 1);
1011 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1012 $log->debug("Script for biblio fingerprinting completed successfully...");
1016 __PACKAGE__->register_method(
1017 api_name => "open-ils.ingest.fingerprint.xml",
1018 method => "biblio_fingerprint",
1024 sub biblio_descriptor {
1027 my $xml = OpenILS::Application::Ingest::entityize(shift);
1029 $log->internal("Got MARC [$xml]");
1032 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1033 my $conf = OpenSRF::Utils::SettingsClient->new;
1035 my $libs = $conf->config_value(@pfx, 'script_path');
1036 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1037 my $script_libs = (ref($libs)) ? $libs : [$libs];
1039 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1041 $rd_script = new OpenILS::Utils::ScriptRunner
1042 ( file => $script_file,
1043 paths => $script_libs,
1044 reset_count => 100 );
1047 $log->debug("Setting up environment for descriptor extraction script...");
1048 $rd_script->insert('environment.marc' => $xml => 1);
1049 $log->debug("Environment building complete...");
1051 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1052 $log->debug("Script for biblio descriptor extraction completed successfully");
1056 __PACKAGE__->register_method(
1057 api_name => "open-ils.ingest.descriptor.xml",
1058 method => "biblio_descriptor",
1068 sub in_transaction {
1069 OpenILS::Application::Ingest->post_init();
1070 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1073 sub begin_transaction {
1077 OpenILS::Application::Ingest->post_init();
1078 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1082 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1083 #__PACKAGE__->st_sess->connect;
1084 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1085 unless (defined $r and $r) {
1086 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1087 #__PACKAGE__->st_sess->disconnect;
1088 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1092 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1095 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1098 sub rollback_transaction {
1102 OpenILS::Application::Ingest->post_init();
1103 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1107 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1109 $log->debug("Ingest isn't inside a transaction.", INFO);
1111 } catch Error with {
1112 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1118 sub commit_transaction {
1122 OpenILS::Application::Ingest->post_init();
1123 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1126 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1128 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1129 unless (defined $r and $r) {
1130 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1131 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1133 #__PACKAGE__->st_sess->disconnect;
1135 $log->debug("Ingest isn't inside a transaction.", INFO);
1137 } catch Error with {
1138 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1147 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1148 return shift( @res );
1151 sub scrub_authority_record {
1157 if (!OpenILS::Application::Ingest->in_transaction) {
1158 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1164 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1166 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1167 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1169 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1171 $log->debug('Scrubbing failed : '.shift(), ERROR);
1172 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1176 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1177 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1180 __PACKAGE__->register_method(
1181 api_name => "open-ils.worm.scrub.authority",
1182 method => "scrub_authority_record",
1188 sub scrub_metabib_record {
1193 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1194 $rec = OpenILS::Application::Ingest->storage_req(
1195 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1200 if (!OpenILS::Application::Ingest->in_transaction) {
1201 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1207 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1209 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1210 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1211 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1212 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1213 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1214 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1215 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1216 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1218 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1219 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1221 for my $mr (@$masters) {
1222 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1223 my $others = OpenILS::Application::Ingest->storage_req(
1224 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1227 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1228 $mr->master_record($others->[0]->source);
1229 OpenILS::Application::Ingest->storage_req(
1230 'open-ils.storage.direct.metabib.metarecord.remote_update',
1232 { master_record => $others->[0]->source, mods => undef }
1235 warn "Removing metarecord whose master is $rec";
1236 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1237 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1238 warn "Metarecord removed";
1239 $log->debug( "Metarecord removed", DEBUG);
1243 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1246 $log->debug('Scrubbing failed : '.shift(), ERROR);
1247 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1251 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1252 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1255 __PACKAGE__->register_method(
1256 api_name => "open-ils.worm.scrub.biblio",
1257 method => "scrub_metabib_record",
1262 sub wormize_biblio_metarecord {
1267 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1270 for my $r (@$recs) {
1273 $success = wormize_biblio_record($self => $client => $r->source);
1275 { record => $r->source,
1276 metarecord => $rec->metarecord,
1277 success => $success,
1280 } catch Error with {
1283 { record => $r->source,
1284 metarecord => $rec->metarecord,
1285 success => $success,
1293 __PACKAGE__->register_method(
1294 api_name => "open-ils.worm.wormize.metarecord",
1295 method => "wormize_biblio_metarecord",
1300 __PACKAGE__->register_method(
1301 api_name => "open-ils.worm.wormize.metarecord.nomap",
1302 method => "wormize_biblio_metarecord",
1307 __PACKAGE__->register_method(
1308 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1309 method => "wormize_biblio_metarecord",
1314 __PACKAGE__->register_method(
1315 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1316 method => "wormize_biblio_metarecord",
1323 sub wormize_biblio_record {
1328 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1329 $rec = OpenILS::Application::Ingest->storage_req(
1330 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1336 if (!OpenILS::Application::Ingest->in_transaction) {
1337 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1343 # clean up the cruft
1344 unless ($self->api_name =~ /noscrub/o) {
1345 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1349 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1352 my @rec_descriptor = ();
1360 my %metarecord = ();
1361 my @source_map = ();
1362 for my $r (@$bibs) {
1364 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1366 my $xml = $parser->parse_string($r->marc);
1368 #update the fingerprint
1369 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1370 OpenILS::Application::Ingest->storage_req(
1371 'open-ils.storage.direct.biblio.record_entry.remote_update',
1373 { fingerprint => $fp->{fingerprint},
1374 quality => int($fp->{quality}) }
1375 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1377 # the full_rec stuff
1378 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1379 $fr->record( $r->id );
1380 push @full_rec, $fr;
1383 # the rec_descriptor stuff
1384 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1385 $rd->record( $r->id );
1386 push @rec_descriptor, $rd;
1388 # the indexing field entry stuff
1389 for my $class ( qw/title author subject keyword series/ ) {
1390 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1391 $fe->source( $r->id );
1392 push @{$field_entry{$class}}, $fe;
1396 unless ($self->api_name =~ /nomap/o) {
1397 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1400 $mr = Fieldmapper::metabib::metarecord->new;
1401 $mr->fingerprint( $fp->{fingerprint} );
1402 $mr->master_record( $r->id );
1403 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1406 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1407 $mr_map->metarecord( $mr->id );
1408 $mr_map->source( $r->id );
1409 push @source_map, $mr_map;
1411 $metarecord{$mr->id} = $mr;
1413 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1415 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1416 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1421 if (@rec_descriptor) {
1422 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1424 OpenILS::Application::Ingest->storage_req(
1425 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1429 for my $mr ( values %metarecord ) {
1430 my $sources = OpenILS::Application::Ingest->storage_req(
1431 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1435 my $bibs = OpenILS::Application::Ingest->storage_req(
1436 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1437 [ map { $_->source } @$sources ]
1440 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1442 OpenILS::Application::Ingest->storage_req(
1443 'open-ils.storage.direct.metabib.metarecord.remote_update',
1445 { master_record => $master->id, mods => undef }
1449 OpenILS::Application::Ingest->storage_req(
1450 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1452 ) if (@rec_descriptor);
1454 OpenILS::Application::Ingest->storage_req(
1455 'open-ils.storage.direct.metabib.full_rec.batch.create',
1459 OpenILS::Application::Ingest->storage_req(
1460 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1461 @{ $field_entry{title} }
1462 ) if (@{ $field_entry{title} });
1464 OpenILS::Application::Ingest->storage_req(
1465 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1466 @{ $field_entry{author} }
1467 ) if (@{ $field_entry{author} });
1469 OpenILS::Application::Ingest->storage_req(
1470 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1471 @{ $field_entry{subject} }
1472 ) if (@{ $field_entry{subject} });
1474 OpenILS::Application::Ingest->storage_req(
1475 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1476 @{ $field_entry{keyword} }
1477 ) if (@{ $field_entry{keyword} });
1479 OpenILS::Application::Ingest->storage_req(
1480 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1481 @{ $field_entry{series} }
1482 ) if (@{ $field_entry{series} });
1484 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1490 $log->debug('Wormization failed : '.shift(), ERROR);
1491 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1495 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1496 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1499 __PACKAGE__->register_method(
1500 api_name => "open-ils.worm.wormize.biblio",
1501 method => "wormize_biblio_record",
1505 __PACKAGE__->register_method(
1506 api_name => "open-ils.worm.wormize.biblio.nomap",
1507 method => "wormize_biblio_record",
1511 __PACKAGE__->register_method(
1512 api_name => "open-ils.worm.wormize.biblio.noscrub",
1513 method => "wormize_biblio_record",
1517 __PACKAGE__->register_method(
1518 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1519 method => "wormize_biblio_record",
1524 sub wormize_authority_record {
1530 if (!OpenILS::Application::Ingest->in_transaction) {
1531 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1537 # clean up the cruft
1538 unless ($self->api_name =~ /noscrub/o) {
1539 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1543 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1546 my @rec_descriptor = ();
1547 for my $r (@$bibs) {
1548 my $xml = $parser->parse_string($r->marc);
1550 # the full_rec stuff
1551 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1552 $fr->record( $r->id );
1553 push @full_rec, $fr;
1556 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1557 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1558 #$rd->record( $r->id );
1559 #push @rec_descriptor, $rd;
1563 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1565 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1566 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1568 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1571 $log->debug('Wormization failed : '.shift(), ERROR);
1572 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1576 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1577 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1580 __PACKAGE__->register_method(
1581 api_name => "open-ils.worm.wormize.authority",
1582 method => "wormize_authority_record",
1586 __PACKAGE__->register_method(
1587 api_name => "open-ils.worm.wormize.authority.noscrub",
1588 method => "wormize_authority_record",
1594 # --------------------------------------------------------------------------------
1595 # MARC index extraction
1597 package OpenILS::Application::Ingest::XPATH;
1598 use base qw/OpenILS::Application::Ingest/;
1599 use Unicode::Normalize;
1601 # give this a MODS documentElement and an XPATH expression
1602 sub _xpath_to_string {
1606 my $ns_prefix = shift;
1609 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1613 # grab the set of matching nodes
1614 my @nodes = $xml->findnodes( $xpath );
1615 for my $value (@nodes) {
1617 # grab all children of the node
1618 my @children = $value->childNodes();
1619 for my $child (@children) {
1621 # add the childs content to the growing buffer
1622 my $content = quotemeta($child->textContent);
1623 next if ($unique && $string =~ /$content/); # uniquify the values
1624 $string .= $child->textContent . " ";
1627 $string .= $value->textContent . " ";
1630 return NFD($string);
1633 sub class_all_index_string_xml {
1639 OpenILS::Application::Ingest->post_init();
1640 $xml = $parser->parse_string($xml) unless (ref $xml);
1642 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1643 for my $type ( keys %{ $xpathset->{$class} } ) {
1644 my $value = _xpath_to_string(
1645 $mods_sheet->transform($xml)->documentElement,
1646 $xpathset->{$class}->{$type}->{xpath},
1647 "http://www.loc.gov/mods/",
1654 $value = NFD($value);
1655 $value =~ s/\pM+//sgo;
1656 $value =~ s/\pC+//sgo;
1657 $value =~ s/\W+$//sgo;
1659 $value =~ s/(\w)\./$1/sgo;
1660 $value = lc($value);
1662 my $fm = $class_constructor->new;
1663 $fm->value( $value );
1664 $fm->field( $xpathset->{$class}->{$type}->{id} );
1665 $client->respond($fm);
1669 __PACKAGE__->register_method(
1670 api_name => "open-ils.worm.field_entry.class.xml",
1671 method => "class_all_index_string_xml",
1677 sub class_all_index_string_record {
1683 OpenILS::Application::Ingest->post_init();
1684 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1686 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1688 $client->respond($fm);
1692 __PACKAGE__->register_method(
1693 api_name => "open-ils.worm.field_entry.class.record",
1694 method => "class_all_index_string_record",
1701 sub class_index_string_xml {
1708 OpenILS::Application::Ingest->post_init();
1709 $xml = $parser->parse_string($xml) unless (ref $xml);
1710 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1712 __PACKAGE__->register_method(
1713 api_name => "open-ils.worm.class.type.xml",
1714 method => "class_index_string_xml",
1719 sub class_index_string_record {
1726 OpenILS::Application::Ingest->post_init();
1727 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1729 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1730 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1733 __PACKAGE__->register_method(
1734 api_name => "open-ils.worm.class.type.record",
1735 method => "class_index_string_record",
1749 OpenILS::Application::Ingest->post_init();
1750 $xml = $parser->parse_string($xml) unless (ref $xml);
1751 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1753 __PACKAGE__->register_method(
1754 api_name => "open-ils.worm.xpath.xml",
1755 method => "xml_xpath",
1769 OpenILS::Application::Ingest->post_init();
1770 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1772 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1773 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1776 __PACKAGE__->register_method(
1777 api_name => "open-ils.worm.xpath.record",
1778 method => "record_xpath",
1784 # --------------------------------------------------------------------------------
1787 package OpenILS::Application::Ingest::Biblio::Leader;
1788 use base qw/OpenILS::Application::Ingest/;
1789 use Unicode::Normalize;
1791 our %marc_type_groups = (
1794 VIS => q/[gkro]{1}/,
1803 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1807 our %biblio_descriptor_code = (
1808 item_type => sub { substr($ldr,6,1); },
1811 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1812 return substr($oo8,29,1);
1813 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1814 return substr($oo8,23,1);
1818 bib_level => sub { substr($ldr,7,1); },
1819 control_type => sub { substr($ldr,8,1); },
1820 char_encoding => sub { substr($ldr,9,1); },
1821 enc_level => sub { substr($ldr,17,1); },
1822 cat_form => sub { substr($ldr,18,1); },
1823 pub_status => sub { substr($ldr,5,1); },
1824 item_lang => sub { substr($oo8,35,3); },
1825 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1826 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1827 audience => sub { substr($oo8,22,1); },
1830 sub _extract_biblio_descriptors {
1833 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1834 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1835 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1837 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1838 for my $rd_field ( keys %biblio_descriptor_code ) {
1839 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1845 sub extract_biblio_desc_xml {
1850 $xml = $parser->parse_string($xml) unless (ref $xml);
1852 return _extract_biblio_descriptors( $xml );
1854 __PACKAGE__->register_method(
1855 api_name => "open-ils.worm.biblio_leader.xml",
1856 method => "extract_biblio_desc_xml",
1861 sub extract_biblio_desc_record {
1866 OpenILS::Application::Ingest->post_init();
1867 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1869 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1870 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1873 __PACKAGE__->register_method(
1874 api_name => "open-ils.worm.biblio_leader.record",
1875 method => "extract_biblio_desc_record",
1880 # --------------------------------------------------------------------------------
1883 package OpenILS::Application::Ingest::FlatMARC;
1884 use base qw/OpenILS::Application::Ingest/;
1885 use Unicode::Normalize;
1888 sub _marcxml_to_full_rows {
1890 my $marcxml = shift;
1891 my $xmltype = shift || 'metabib';
1893 my $type = "Fieldmapper::${xmltype}::full_rec";
1897 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1899 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1900 next unless $tagline;
1902 my $ns = $type->new;
1905 my $val = $tagline->textContent;
1907 $val =~ s/\pM+//sgo;
1908 $val =~ s/\pC+//sgo;
1909 $val =~ s/\W+$//sgo;
1915 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1916 next unless $tagline;
1918 my $ns = $type->new;
1920 $ns->tag( $tagline->getAttribute( "tag" ) );
1921 my $val = $tagline->textContent;
1923 $val =~ s/\pM+//sgo;
1924 $val =~ s/\pC+//sgo;
1925 $val =~ s/\W+$//sgo;
1931 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1932 next unless $tagline;
1934 my $tag = $tagline->getAttribute( "tag" );
1935 my $ind1 = $tagline->getAttribute( "ind1" );
1936 my $ind2 = $tagline->getAttribute( "ind2" );
1938 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1941 my $ns = $type->new;
1946 $ns->subfield( $data->getAttribute( "code" ) );
1947 my $val = $data->textContent;
1949 $val =~ s/\pM+//sgo;
1950 $val =~ s/\pC+//sgo;
1951 $val =~ s/\W+$//sgo;
1952 $ns->value( lc($val) );
1958 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1967 $xml = $parser->parse_string($xml) unless (ref $xml);
1969 my $type = 'metabib';
1970 $type = 'authority' if ($self->api_name =~ /authority/o);
1972 OpenILS::Application::Ingest->post_init();
1974 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1977 __PACKAGE__->register_method(
1978 api_name => "open-ils.worm.flat_marc.authority.xml",
1979 method => "flat_marc_xml",
1984 __PACKAGE__->register_method(
1985 api_name => "open-ils.worm.flat_marc.biblio.xml",
1986 method => "flat_marc_xml",
1992 sub flat_marc_record {
1997 my $type = 'biblio';
1998 $type = 'authority' if ($self->api_name =~ /authority/o);
2000 OpenILS::Application::Ingest->post_init();
2001 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
2003 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
2006 __PACKAGE__->register_method(
2007 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
2008 method => "flat_marc_record",
2013 __PACKAGE__->register_method(
2014 api_name => "open-ils.worm.flat_marc.authority.record_entry",
2015 method => "flat_marc_record",
2022 # --------------------------------------------------------------------------------
2025 package OpenILS::Application::Ingest::Biblio::Fingerprint;
2026 use base qw/OpenILS::Application::Ingest/;
2027 use Unicode::Normalize;
2028 use OpenSRF::EX qw/:try/;
2030 my @fp_mods_xpath = (
2031 '//mods:mods/mods:typeOfResource[text()="text"]' => [
2034 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2035 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2036 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2037 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2040 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2042 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2043 $text =~ s/\pM+//gso;
2044 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2046 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2047 $text =~ s/\s+/ /sgo;
2048 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2049 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2050 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2051 $text =~ s/\b(?:the|an?)\b//sgo;
2052 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2053 $text =~ s/\[.[^\]]+\]//sgo;
2054 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2055 $text =~ s/\s*[;\/\.]*$//sgo;
2056 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2061 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2062 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2065 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2067 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2068 $text =~ s/\pM+//gso;
2069 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2071 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2072 $text =~ s/\s+/ /sgo;
2073 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2074 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2075 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076 $text =~ s/,?\s+.*$//sgo;
2077 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2082 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2085 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2086 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2087 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2088 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2089 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2090 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2091 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2092 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2095 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2097 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2098 $text =~ s/\pM+//gso;
2099 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2101 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2102 $text =~ s/\s+/ /sgo;
2103 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2104 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2105 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2106 $text =~ s/\b(?:the|an?)\b//sgo;
2107 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2108 $text =~ s/\[.[^\]]+\]//sgo;
2109 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2110 $text =~ s/\s*[;\/\.]*$//sgo;
2111 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2116 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2117 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2118 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2119 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2122 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2124 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2125 $text =~ s/\pM+//gso;
2126 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2128 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2129 $text =~ s/\s+/ /sgo;
2130 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2131 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2132 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2133 $text =~ s/,?\s+.*$//sgo;
2134 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2141 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2145 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2149 my $match_index = 0;
2150 my $block_index = 1;
2151 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2152 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2154 my $block_name_index = 0;
2155 my $block_value_index = 1;
2156 my $block = $fp_mods_xpath[$block_index];
2157 while ( my $part = $$block[$block_value_index] ) {
2159 for my $xpath ( @{ $part->{xpath} } ) {
2160 $text = $mods->findvalue( $xpath );
2164 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2168 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2169 $fp_string .= $text;
2172 $block_name_index += 2;
2173 $block_value_index += 2;
2177 $fp_string =~ s/\W+//gso;
2178 $log->debug("Fingerprint is [$fp_string]", INFO);;
2188 sub refingerprint_bibrec {
2194 if (!OpenILS::Application::Ingest->in_transaction) {
2195 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2201 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2202 for my $b (@$bibs) {
2203 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2205 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2207 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2209 OpenILS::Application::Ingest->storage_req(
2210 'open-ils.storage.direct.biblio.record_entry.remote_update',
2212 { fingerprint => $fp->{fingerprint},
2213 quality => $fp->{quality} }
2216 if ($self->api_name !~ /nomap/o) {
2217 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2218 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2223 if (ref($old_source_map) and @$old_source_map) {
2224 for my $m (@$old_source_map) {
2225 $old_mrid = $m->metarecord;
2226 OpenILS::Application::Ingest->storage_req(
2227 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2233 my $old_sm = OpenILS::Application::Ingest->storage_req(
2234 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2235 { metarecord => $old_mrid }
2238 if (ref($old_sm) and @$old_sm == 0) {
2239 OpenILS::Application::Ingest->storage_req(
2240 'open-ils.storage.direct.metabib.metarecord.delete',
2245 my $mr = OpenILS::Application::Ingest->storage_req(
2246 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2247 { fingerprint => $fp->{fingerprint} }
2251 $mr = Fieldmapper::metabib::metarecord->new;
2252 $mr->fingerprint( $fp->{fingerprint} );
2253 $mr->master_record( $b->id );
2254 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2257 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2258 $mr_map->metarecord( $mr->id );
2259 $mr_map->source( $b->id );
2260 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2264 $client->respond($b->id);
2268 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2272 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2273 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2276 __PACKAGE__->register_method(
2277 api_name => "open-ils.worm.fingerprint.record.update",
2278 method => "refingerprint_bibrec",
2284 __PACKAGE__->register_method(
2285 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2286 method => "refingerprint_bibrec",
2293 sub fingerprint_bibrec {
2298 OpenILS::Application::Ingest->post_init();
2299 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2301 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2302 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2306 __PACKAGE__->register_method(
2307 api_name => "open-ils.worm.fingerprint.record",
2308 method => "fingerprint_bibrec",
2314 sub fingerprint_mods {
2319 OpenILS::Application::Ingest->post_init();
2320 my $mods = $parser->parse_string($xml)->documentElement;
2322 return _fp_mods( $mods );
2324 __PACKAGE__->register_method(
2325 api_name => "open-ils.worm.fingerprint.mods",
2326 method => "fingerprint_mods",
2331 sub fingerprint_marc {
2336 $xml = $parser->parse_string($xml) unless (ref $xml);
2338 OpenILS::Application::Ingest->post_init();
2339 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2340 $log->debug("Returning [$fp] as fingerprint", INFO);
2343 __PACKAGE__->register_method(
2344 api_name => "open-ils.worm.fingerprint.marc",
2345 method => "fingerprint_marc",
2353 sub biblio_fingerprint_record {
2358 OpenILS::Application::Ingest->post_init();
2360 my $marc = OpenILS::Application::Ingest
2361 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2364 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2365 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2368 __PACKAGE__->register_method(
2369 api_name => "open-ils.worm.fingerprint.record",
2370 method => "biblio_fingerprint_record",
2376 sub biblio_fingerprint {
2381 OpenILS::Application::Ingest->post_init();
2383 $marc = $parser->parse_string($marc) unless (ref $marc);
2385 my $mods = OpenILS::Application::Ingest::entityize(
2387 ->transform( $marc )
2393 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2396 $log->internal("Got MARC [$marc]");
2397 $log->internal("Created MODS [$mods]");
2400 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2401 my $conf = OpenSRF::Utils::SettingsClient->new;
2403 my $libs = $conf->config_value(@pfx, 'script_path');
2404 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2405 my $script_libs = (ref($libs)) ? $libs : [$libs];
2407 $log->debug("Loading script $script_file for biblio fingerprinting...");
2409 $fp_script = new OpenILS::Utils::ScriptRunner
2410 ( file => $script_file,
2411 paths => $script_libs,
2412 reset_count => 1000 );
2415 $log->debug("Applying environment for biblio fingerprinting...");
2417 my $env = {marc => $marc, mods => $mods};
2418 #my $res = {fingerprint => '', quality => '0'};
2420 $fp_script->insert('environment' => $env);
2421 #$fp_script->insert('result' => $res);
2423 $log->debug("Running script for biblio fingerprinting...");
2425 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2427 $log->debug("Script for biblio fingerprinting completed successfully...");
2431 __PACKAGE__->register_method(
2432 api_name => "open-ils.worm.fingerprint.marc",
2433 method => "biblio_fingerprint",
2438 # --------------------------------------------------------------------------------
2452 my $create_source_map;
2467 my %descriptor_code = (
2468 item_type => 'substr($ldr,6,1)',
2469 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2470 bib_level => 'substr($ldr,7,1)',
2471 control_type => 'substr($ldr,8,1)',
2472 char_encoding => 'substr($ldr,9,1)',
2473 enc_level => 'substr($ldr,17,1)',
2474 cat_form => 'substr($ldr,18,1)',
2475 pub_status => 'substr($ldr,5,1)',
2476 item_lang => 'substr($oo8,35,3)',
2477 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2478 audience => 'substr($oo8,22,1)',
2488 if ($self->api_name =~ /no_map/o) {
2492 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2494 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2496 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2498 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2500 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2501 unless ($sm_lookup);
2502 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2503 unless ($mr_lookup);
2504 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2505 unless ($mr_update);
2506 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2508 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2509 unless ($update_entry);
2510 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2511 unless ($rm_old_sm);
2512 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2513 unless ($rm_old_rd);
2514 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2515 unless ($rm_old_fr);
2516 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2517 unless ($rm_old_tr);
2518 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2519 unless ($rm_old_ar);
2520 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2521 unless ($rm_old_sr);
2522 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2523 unless ($rm_old_kr);
2524 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2525 unless ($rm_old_ser);
2526 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2527 unless ($mr_create);
2528 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2529 unless ($create_source_map);
2530 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2531 unless ($rd_create);
2532 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2533 unless ($fr_create);
2534 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2535 unless ($$create{title});
2536 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2537 unless ($$create{author});
2538 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2539 unless ($$create{subject});
2540 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2541 unless ($$create{keyword});
2542 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2543 unless ($$create{series});
2546 my ($outer_xact) = $in_xact->run;
2548 unless ($outer_xact) {
2549 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2550 my ($r) = $begin->run($client);
2551 unless (defined $r and $r) {
2553 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2556 } catch Error with {
2557 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2567 for my $entry ( $lookup->run(@docids) ) {
2568 # step -1: grab the doc from storage
2569 next unless ($entry);
2572 my $xslt_doc = $parser->parse_file(
2573 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2574 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2577 my $xml = $entry->marc;
2578 my $docid = $entry->id;
2579 my $marcdoc = $parser->parse_string($xml);
2580 my $modsdoc = $mods_sheet->transform($marcdoc);
2582 my $mods = $modsdoc->documentElement;
2583 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2585 $entry->fingerprint( fingerprint_mods( $mods ) );
2586 push @entry_list, $entry;
2588 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2591 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2592 if (!$mr || !@$mr) {
2593 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2594 $mr = new Fieldmapper::metabib::metarecord;
2595 $mr->fingerprint( $entry->fingerprint );
2596 $mr->master_record( $entry->id );
2597 my ($new_mr) = $mr_create->run($mr);
2599 unless (defined $mr) {
2600 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2603 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2608 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2609 $sm->metarecord( $mr->id );
2610 $sm->source( $entry->id );
2611 push @source_maps, $sm;
2614 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2615 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2617 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2618 for my $rd_field ( keys %descriptor_code ) {
2619 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2621 $rd_obj->record( $docid );
2622 push @rd_list, $rd_obj;
2624 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2626 # step 2: build the KOHA rows
2627 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2628 $_->record( $docid ) for (@tmp_list);
2629 push @ns_list, @tmp_list;
2633 last unless ($self->api_name =~ /batch$/o);
2636 $rm_old_rd->run( { record => \@docids } );
2637 $rm_old_fr->run( { record => \@docids } );
2638 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2639 $rm_old_tr->run( { source => \@docids } );
2640 $rm_old_ar->run( { source => \@docids } );
2641 $rm_old_sr->run( { source => \@docids } );
2642 $rm_old_kr->run( { source => \@docids } );
2643 $rm_old_ser->run( { source => \@docids } );
2646 my ($sm) = $create_source_map->run(@source_maps);
2647 unless (defined $sm) {
2648 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2650 my ($mr) = $mr_update->run(@mr_list);
2651 unless (defined $mr) {
2652 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2656 my ($re) = $update_entry->run(@entry_list);
2657 unless (defined $re) {
2658 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2661 my ($rd) = $rd_create->run(@rd_list);
2662 unless (defined $rd) {
2663 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2666 my ($fr) = $fr_create->run(@ns_list);
2667 unless (defined $fr) {
2668 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2671 # step 5: insert the new metadata
2672 for my $class ( qw/title author subject keyword series/ ) {
2674 for my $doc ( @mods_data ) {
2675 my ($did) = keys %$doc;
2676 my ($data) = values %$doc;
2678 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2679 for my $row ( keys %{ $$data{$class} } ) {
2680 next unless (exists $$data{$class}{$row});
2681 next unless ($$data{$class}{$row}{value});
2682 my $fm_obj = $fm_constructor->new;
2683 $fm_obj->value( $$data{$class}{$row}{value} );
2684 $fm_obj->field( $$data{$class}{$row}{field_id} );
2685 $fm_obj->source( $did );
2686 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2688 push @md_list, $fm_obj;
2692 my ($cr) = $$create{$class}->run(@md_list);
2693 unless (defined $cr) {
2694 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2698 unless ($outer_xact) {
2699 $log->debug("Commiting transaction started by the Ingest.", INFO);
2700 my ($c) = $commit->run;
2701 unless (defined $c and $c) {
2703 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2709 __PACKAGE__->register_method(
2710 api_name => "open-ils.worm.wormize",
2711 method => "wormize",
2715 __PACKAGE__->register_method(
2716 api_name => "open-ils.worm.wormize.no_map",
2717 method => "wormize",
2721 __PACKAGE__->register_method(
2722 api_name => "open-ils.worm.wormize.batch",
2723 method => "wormize",
2727 __PACKAGE__->register_method(
2728 api_name => "open-ils.worm.wormize.no_map.batch",
2729 method => "wormize",
2744 my $acreate_source_map;
2759 sub authority_wormize {
2766 if ($self->api_name =~ /no_map/o) {
2770 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2772 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2774 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2776 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2778 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2780 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2781 unless ($aupdate_entry);
2782 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2783 unless ($arm_old_rd);
2784 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2785 unless ($arm_old_fr);
2786 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2787 unless ($ard_create);
2788 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2789 unless ($afr_create);
2792 my ($outer_xact) = $in_xact->run;
2794 unless ($outer_xact) {
2795 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2796 my ($r) = $begin->run($client);
2797 unless (defined $r and $r) {
2799 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2802 } catch Error with {
2803 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2813 for my $entry ( $lookup->run(@docids) ) {
2814 # step -1: grab the doc from storage
2815 next unless ($entry);
2818 # my $xslt_doc = $parser->parse_file(
2819 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2820 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2823 my $xml = $entry->marc;
2824 my $docid = $entry->id;
2825 my $marcdoc = $parser->parse_string($xml);
2826 #my $madsdoc = $mads_sheet->transform($marcdoc);
2828 #my $mads = $madsdoc->documentElement;
2829 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2831 push @entry_list, $entry;
2833 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2834 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2836 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2837 for my $rd_field ( keys %descriptor_code ) {
2838 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2840 $rd_obj->record( $docid );
2841 push @rd_list, $rd_obj;
2843 # step 2: build the KOHA rows
2844 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2845 $_->record( $docid ) for (@tmp_list);
2846 push @ns_list, @tmp_list;
2850 last unless ($self->api_name =~ /batch$/o);
2853 $arm_old_rd->run( { record => \@docids } );
2854 $arm_old_fr->run( { record => \@docids } );
2856 my ($rd) = $ard_create->run(@rd_list);
2857 unless (defined $rd) {
2858 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2861 my ($fr) = $fr_create->run(@ns_list);
2862 unless (defined $fr) {
2863 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2866 unless ($outer_xact) {
2867 $log->debug("Commiting transaction started by Ingest.", INFO);
2868 my ($c) = $commit->run;
2869 unless (defined $c and $c) {
2871 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2877 __PACKAGE__->register_method(
2878 api_name => "open-ils.worm.authortiy.wormize",
2879 method => "wormize",
2883 __PACKAGE__->register_method(
2884 api_name => "open-ils.worm.authority.wormize.batch",
2885 method => "wormize",
2891 # --------------------------------------------------------------------------------
2894 sub _marcxml_to_full_rows {
2896 my $marcxml = shift;
2897 my $type = shift || 'Fieldmapper::metabib::full_rec';
2901 my $root = $marcxml->documentElement;
2903 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2904 next unless $tagline;
2906 my $ns = new Fieldmapper::metabib::full_rec;
2909 my $val = NFD($tagline->textContent);
2910 $val =~ s/(\pM+)//gso;
2916 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2917 next unless $tagline;
2919 my $ns = new Fieldmapper::metabib::full_rec;
2921 $ns->tag( $tagline->getAttribute( "tag" ) );
2922 my $val = NFD($tagline->textContent);
2923 $val =~ s/(\pM+)//gso;
2929 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2930 next unless $tagline;
2932 my $tag = $tagline->getAttribute( "tag" );
2933 my $ind1 = $tagline->getAttribute( "ind1" );
2934 my $ind2 = $tagline->getAttribute( "ind2" );
2936 for my $data ( $tagline->childNodes ) {
2939 my $ns = $type->new;
2944 $ns->subfield( $data->getAttribute( "code" ) );
2945 my $val = NFD($data->textContent);
2946 $val =~ s/(\pM+)//gso;
2947 $ns->value( lc($val) );
2955 sub _get_field_value {
2957 my( $root, $xpath ) = @_;
2961 # grab the set of matching nodes
2962 my @nodes = $root->findnodes( $xpath );
2963 for my $value (@nodes) {
2965 # grab all children of the node
2966 my @children = $value->childNodes();
2967 for my $child (@children) {
2969 # add the childs content to the growing buffer
2970 my $content = quotemeta($child->textContent);
2971 next if ($string =~ /$content/); # uniquify the values
2972 $string .= $child->textContent . " ";
2975 $string .= $value->textContent . " ";
2978 $string = NFD($string);
2979 $string =~ s/(\pM)//gso;
2984 sub modsdoc_to_values {
2985 my( $self, $mods ) = @_;
2987 for my $class (keys %$xpathset) {
2988 $data->{$class} = {};
2989 for my $type (keys %{$xpathset->{$class}}) {
2990 $data->{$class}->{$type} = {};
2991 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};