1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
21 our %supported_formats = (
22 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
23 mods => {ns => 'http://www.loc.gov/mods/'},
24 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
26 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28 atom => {ns => 'http://www.w3.org/2005/Atom'},
29 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
33 rss10 => {ns => 'http://purl.org/rss/1.0/'},
34 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
39 my $log = 'OpenSRF::Utils::Logger';
41 my $parser = XML::LibXML->new();
42 my $xslt = XML::LibXSLT->new();
52 unless (keys %$xpathset) {
53 $log->debug("Running post_init", DEBUG);
55 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57 unless ($supported_formats{mods}{xslt}) {
58 $log->debug("Loading MODS XSLT", DEBUG);
59 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63 unless ($supported_formats{mods3}{xslt}) {
64 $log->debug("Loading MODS v3 XSLT", DEBUG);
65 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
70 my $req = OpenSRF::AppSession
71 ->create('open-ils.cstore')
72 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75 if (ref $req and @$req) {
77 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
96 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
100 # --------------------------------------------------------------------------------
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
107 sub rw_biblio_ingest_single_object {
112 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113 return undef unless ($blob);
115 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116 $bib->quality( $blob->{fingerprint}->{quality} );
118 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
120 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
122 # update full_rec stuff ...
123 my $tmp = $cstore->request(
124 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125 { record => $bib->id }
128 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
131 # update rec_descriptor stuff ...
132 $tmp = $cstore->request(
133 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134 { record => $bib->id }
137 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
140 # deal with classed fields...
141 for my $class ( qw/title author subject keyword series/ ) {
142 $tmp = $cstore->request(
143 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144 { source => $bib->id }
147 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
149 for my $obj ( @{ $blob->{field_entries} } ) {
150 my $class = $obj->class_name;
151 $class =~ s/^Fieldmapper:://o;
153 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
158 $tmp = $cstore->request(
159 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160 { source => $bib->id }
163 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
166 # Get the matchin MR, if any.
167 my $mr = $cstore->request(
168 'open-ils.cstore.direct.metabib.metarecord.search',
169 { fingerprint => $bib->fingerprint }
173 $mr = new Fieldmapper::metabib::metarecord;
174 $mr->fingerprint( $bib->fingerprint );
175 $mr->master_record( $bib->id );
178 "open-ils.cstore.direct.metabib.metarecord.create",
179 $mr => { quiet => 'true' }
183 my $mrm = $cstore->request(
184 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185 { metarecord => $mr->id }
188 my $best = $cstore->request(
189 "open-ils.cstore.direct.biblio.record_entry.search",
190 { id => [ map { $_->source } @$mrm ] },
191 { 'select' => { bre => [ qw/id quality/ ] },
192 order_by => { bre => "quality desc" },
197 if ($best->quality > $bib->quality) {
198 $mr->master_record($best->id);
200 $mr->master_record($bib->id);
203 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
206 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
207 $mrm->source($bib->id);
208 $mrm->metarecord($mr->id);
210 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
211 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
213 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
217 __PACKAGE__->register_method(
218 api_name => "open-ils.ingest.full.biblio.object",
219 method => "rw_biblio_ingest_single_object",
224 sub rw_biblio_ingest_single_record {
229 OpenILS::Application::Ingest->post_init();
230 my $r = OpenSRF::AppSession
231 ->create('open-ils.cstore')
232 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
235 return undef unless ($r and @$r);
237 return $self->method_lookup("open-ils.ingest.full.biblio.object")->run($r);
239 __PACKAGE__->register_method(
240 api_name => "open-ils.ingest.full.biblio.record",
241 method => "rw_biblio_ingest_single_record",
246 sub ro_biblio_ingest_single_object {
250 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
252 my $document = $parser->parse_string($xml);
254 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
255 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
256 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
257 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
259 $_->source($bib->id) for (@mXfe);
260 $_->record($bib->id) for (@mfr);
261 $rd->record($bib->id) if ($rd);
263 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
265 __PACKAGE__->register_method(
266 api_name => "open-ils.ingest.full.biblio.object.readonly",
267 method => "ro_biblio_ingest_single_object",
272 sub ro_biblio_ingest_single_xml {
275 my $xml = OpenILS::Application::Ingest::entityize(shift);
277 my $document = $parser->parse_string($xml);
279 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
280 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
281 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
282 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
284 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
286 __PACKAGE__->register_method(
287 api_name => "open-ils.ingest.full.biblio.xml.readonly",
288 method => "ro_biblio_ingest_single_xml",
293 sub ro_biblio_ingest_single_record {
298 OpenILS::Application::Ingest->post_init();
299 my $r = OpenSRF::AppSession
300 ->create('open-ils.cstore')
301 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
304 return undef unless ($r and @$r);
306 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
308 $_->source($rec) for (@{$res->{field_entries}});
309 $_->record($rec) for (@{$res->{full_rec}});
310 $res->{descriptor}->record($rec);
314 __PACKAGE__->register_method(
315 api_name => "open-ils.ingest.full.biblio.record.readonly",
316 method => "ro_biblio_ingest_single_record",
321 sub ro_biblio_ingest_stream_record {
325 OpenILS::Application::Ingest->post_init();
327 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
329 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
331 my $rec = $resp->content;
332 last unless (defined $rec);
334 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
335 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
337 $_->source($rec) for (@{$res->{field_entries}});
338 $_->record($rec) for (@{$res->{full_rec}});
340 $client->respond( $res );
345 __PACKAGE__->register_method(
346 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
347 method => "ro_biblio_ingest_stream_record",
352 sub ro_biblio_ingest_stream_xml {
356 OpenILS::Application::Ingest->post_init();
358 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
360 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
362 my $xml = $resp->content;
363 last unless (defined $xml);
365 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
366 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
368 $client->respond( $res );
373 __PACKAGE__->register_method(
374 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
375 method => "ro_biblio_ingest_stream_xml",
380 sub rw_biblio_ingest_stream_import {
384 OpenILS::Application::Ingest->post_init();
386 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
388 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
390 my $bib = $resp->content;
391 last unless (defined $bib);
393 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
394 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
396 $_->source($bib->id) for (@{$res->{field_entries}});
397 $_->record($bib->id) for (@{$res->{full_rec}});
399 $client->respond( $res );
404 __PACKAGE__->register_method(
405 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
406 method => "rw_biblio_ingest_stream_import",
412 # --------------------------------------------------------------------------------
415 package OpenILS::Application::Ingest::Authority;
416 use base qw/OpenILS::Application::Ingest/;
417 use Unicode::Normalize;
419 sub ro_authority_ingest_single_object {
423 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
425 my $document = $parser->parse_string($xml);
427 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
429 $_->record($bib->id) for (@mfr);
431 return { full_rec => \@mfr };
433 __PACKAGE__->register_method(
434 api_name => "open-ils.ingest.full.authority.object.readonly",
435 method => "ro_authority_ingest_single_object",
440 sub ro_authority_ingest_single_xml {
443 my $xml = OpenILS::Application::Ingest::entityize(shift);
445 my $document = $parser->parse_string($xml);
447 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
449 return { full_rec => \@mfr };
451 __PACKAGE__->register_method(
452 api_name => "open-ils.ingest.full.authority.xml.readonly",
453 method => "ro_authority_ingest_single_xml",
458 sub ro_authority_ingest_single_record {
463 OpenILS::Application::Ingest->post_init();
464 my $r = OpenSRF::AppSession
465 ->create('open-ils.cstore')
466 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
469 return undef unless ($r and @$r);
471 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
473 $_->record($rec) for (@{$res->{full_rec}});
474 $res->{descriptor}->record($rec);
478 __PACKAGE__->register_method(
479 api_name => "open-ils.ingest.full.authority.record.readonly",
480 method => "ro_authority_ingest_single_record",
485 sub ro_authority_ingest_stream_record {
489 OpenILS::Application::Ingest->post_init();
491 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
493 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
495 my $rec = $resp->content;
496 last unless (defined $rec);
498 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
499 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
501 $_->record($rec) for (@{$res->{full_rec}});
503 $client->respond( $res );
508 __PACKAGE__->register_method(
509 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
510 method => "ro_authority_ingest_stream_record",
515 sub ro_authority_ingest_stream_xml {
519 OpenILS::Application::Ingest->post_init();
521 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
523 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
525 my $xml = $resp->content;
526 last unless (defined $xml);
528 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
529 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
531 $client->respond( $res );
536 __PACKAGE__->register_method(
537 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
538 method => "ro_authority_ingest_stream_xml",
543 sub rw_authority_ingest_stream_import {
547 OpenILS::Application::Ingest->post_init();
549 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
551 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
553 my $bib = $resp->content;
554 last unless (defined $bib);
556 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
557 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
559 $_->record($bib->id) for (@{$res->{full_rec}});
561 $client->respond( $res );
566 __PACKAGE__->register_method(
567 api_name => "open-ils.ingest.full.authority.bib_stream.import",
568 method => "rw_authority_ingest_stream_import",
574 # --------------------------------------------------------------------------------
575 # MARC index extraction
577 package OpenILS::Application::Ingest::XPATH;
578 use base qw/OpenILS::Application::Ingest/;
579 use Unicode::Normalize;
581 # give this an XML documentElement and an XPATH expression
582 sub xpath_to_string {
586 my $ns_prefix = shift;
589 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
593 # grab the set of matching nodes
594 my @nodes = $xml->findnodes( $xpath );
595 for my $value (@nodes) {
597 # grab all children of the node
598 my @children = $value->childNodes();
599 for my $child (@children) {
601 # add the childs content to the growing buffer
602 my $content = quotemeta($child->textContent);
603 next if ($unique && $string =~ /$content/); # uniquify the values
604 $string .= $child->textContent . " ";
607 $string .= $value->textContent . " ";
613 sub class_index_string_xml {
619 OpenILS::Application::Ingest->post_init();
620 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
624 for my $class (@classes) {
625 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
626 for my $type ( keys %{ $xpathset->{$class} } ) {
628 my $def = $xpathset->{$class}->{$type};
629 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
634 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
635 $transform_cache{$def->{format}} = $document;
638 my $value = xpath_to_string(
639 $document->documentElement => $def->{xpath},
640 $sf->{ns} => $def->{format},
646 $value = NFD($value);
647 $value =~ s/\pM+//sgo;
648 $value =~ s/\pC+//sgo;
649 $value =~ s/\W+$//sgo;
651 $value =~ s/(\w)\.+(\w)/$1$2/sgo;
654 my $fm = $class_constructor->new;
655 $fm->value( $value );
656 $fm->field( $xpathset->{$class}->{$type}->{id} );
657 $client->respond($fm);
662 __PACKAGE__->register_method(
663 api_name => "open-ils.ingest.field_entry.class.xml",
664 method => "class_index_string_xml",
670 sub class_index_string_record {
676 OpenILS::Application::Ingest->post_init();
677 my $r = OpenSRF::AppSession
678 ->create('open-ils.cstore')
679 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
682 return undef unless ($r and @$r);
684 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
686 $client->respond($fm);
690 __PACKAGE__->register_method(
691 api_name => "open-ils.ingest.field_entry.class.record",
692 method => "class_index_string_record",
698 sub all_index_string_xml {
703 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
704 $client->respond($fm);
708 __PACKAGE__->register_method(
709 api_name => "open-ils.ingest.extract.field_entry.all.xml",
710 method => "all_index_string_xml",
716 sub all_index_string_record {
721 OpenILS::Application::Ingest->post_init();
722 my $r = OpenSRF::AppSession
723 ->create('open-ils.cstore')
724 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
727 return undef unless ($r and @$r);
729 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
731 $client->respond($fm);
735 __PACKAGE__->register_method(
736 api_name => "open-ils.ingest.extract.field_entry.all.record",
737 method => "all_index_string_record",
743 # --------------------------------------------------------------------------------
746 package OpenILS::Application::Ingest::FlatMARC;
747 use base qw/OpenILS::Application::Ingest/;
748 use Unicode::Normalize;
751 sub _marcxml_to_full_rows {
754 my $xmltype = shift || 'metabib';
756 my $type = "Fieldmapper::${xmltype}::full_rec";
760 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
762 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
763 next unless $tagline;
768 my $val = $tagline->textContent;
778 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
779 next unless $tagline;
783 $ns->tag( $tagline->getAttribute( "tag" ) );
784 my $val = $tagline->textContent;
794 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
795 next unless $tagline;
797 my $tag = $tagline->getAttribute( "tag" );
798 my $ind1 = $tagline->getAttribute( "ind1" );
799 my $ind2 = $tagline->getAttribute( "ind2" );
801 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
809 $ns->subfield( $data->getAttribute( "code" ) );
810 my $val = $data->textContent;
815 $ns->value( lc($val) );
821 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
830 $log->debug("processing [$xml]");
832 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
834 my $type = 'metabib';
835 $type = 'authority' if ($self->api_name =~ /authority/o);
837 OpenILS::Application::Ingest->post_init();
839 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
842 __PACKAGE__->register_method(
843 api_name => "open-ils.ingest.flat_marc.authority.xml",
844 method => "flat_marc_xml",
849 __PACKAGE__->register_method(
850 api_name => "open-ils.ingest.flat_marc.biblio.xml",
851 method => "flat_marc_xml",
857 sub flat_marc_record {
863 $type = 'authority' if ($self->api_name =~ /authority/o);
865 OpenILS::Application::Ingest->post_init();
866 my $r = OpenSRF::AppSession
867 ->create('open-ils.cstore')
868 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
872 return undef unless ($r and $r->marc);
874 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
875 for my $row (@rows) {
876 $client->respond($row);
877 $log->debug(JSON->perl2JSON($row), DEBUG);
881 __PACKAGE__->register_method(
882 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
883 method => "flat_marc_record",
888 __PACKAGE__->register_method(
889 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
890 method => "flat_marc_record",
896 # --------------------------------------------------------------------------------
899 package OpenILS::Application::Ingest::Biblio::Fingerprint;
900 use base qw/OpenILS::Application::Ingest/;
901 use Unicode::Normalize;
902 use OpenSRF::EX qw/:try/;
904 sub biblio_fingerprint_record {
909 OpenILS::Application::Ingest->post_init();
911 my $r = OpenSRF::AppSession
912 ->create('open-ils.cstore')
913 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
916 return undef unless ($r and $r->marc);
918 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
919 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
920 $fp->{quality} = int($fp->{quality});
923 __PACKAGE__->register_method(
924 api_name => "open-ils.ingest.fingerprint.record",
925 method => "biblio_fingerprint_record",
931 sub biblio_fingerprint {
934 my $xml = OpenILS::Application::Ingest::entityize(shift);
936 $log->internal("Got MARC [$xml]");
939 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
940 my $conf = OpenSRF::Utils::SettingsClient->new;
942 my $libs = $conf->config_value(@pfx, 'script_path');
943 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
944 my $script_libs = (ref($libs)) ? $libs : [$libs];
946 $log->debug("Loading script $script_file for biblio fingerprinting...");
948 $fp_script = new OpenILS::Utils::ScriptRunner
949 ( file => $script_file,
950 paths => $script_libs,
951 reset_count => 100 );
954 $fp_script->insert('environment' => {marc => $xml} => 1);
956 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
957 $log->debug("Script for biblio fingerprinting completed successfully...");
961 __PACKAGE__->register_method(
962 api_name => "open-ils.ingest.fingerprint.xml",
963 method => "biblio_fingerprint",
969 sub biblio_descriptor {
972 my $xml = OpenILS::Application::Ingest::entityize(shift);
974 $log->internal("Got MARC [$xml]");
977 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
978 my $conf = OpenSRF::Utils::SettingsClient->new;
980 my $libs = $conf->config_value(@pfx, 'script_path');
981 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
982 my $script_libs = (ref($libs)) ? $libs : [$libs];
984 $log->debug("Loading script $script_file for biblio descriptor extraction...");
986 $rd_script = new OpenILS::Utils::ScriptRunner
987 ( file => $script_file,
988 paths => $script_libs,
989 reset_count => 100 );
992 $log->debug("Setting up environment for descriptor extraction script...");
993 $rd_script->insert('environment.marc' => $xml => 1);
994 $log->debug("Environment building complete...");
996 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
997 $log->debug("Script for biblio descriptor extraction completed successfully");
1001 __PACKAGE__->register_method(
1002 api_name => "open-ils.ingest.descriptor.xml",
1003 method => "biblio_descriptor",
1013 sub in_transaction {
1014 OpenILS::Application::Ingest->post_init();
1015 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1018 sub begin_transaction {
1022 OpenILS::Application::Ingest->post_init();
1023 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1027 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1028 #__PACKAGE__->st_sess->connect;
1029 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1030 unless (defined $r and $r) {
1031 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1032 #__PACKAGE__->st_sess->disconnect;
1033 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1037 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1040 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1043 sub rollback_transaction {
1047 OpenILS::Application::Ingest->post_init();
1048 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1052 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1054 $log->debug("Ingest isn't inside a transaction.", INFO);
1056 } catch Error with {
1057 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1063 sub commit_transaction {
1067 OpenILS::Application::Ingest->post_init();
1068 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1071 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1073 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1074 unless (defined $r and $r) {
1075 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1076 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1078 #__PACKAGE__->st_sess->disconnect;
1080 $log->debug("Ingest isn't inside a transaction.", INFO);
1082 } catch Error with {
1083 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1092 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1093 return shift( @res );
1096 sub scrub_authority_record {
1102 if (!OpenILS::Application::Ingest->in_transaction) {
1103 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1109 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1111 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1112 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1114 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1116 $log->debug('Scrubbing failed : '.shift(), ERROR);
1117 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1121 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1122 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1125 __PACKAGE__->register_method(
1126 api_name => "open-ils.worm.scrub.authority",
1127 method => "scrub_authority_record",
1133 sub scrub_metabib_record {
1138 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1139 $rec = OpenILS::Application::Ingest->storage_req(
1140 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1145 if (!OpenILS::Application::Ingest->in_transaction) {
1146 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1152 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1154 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1155 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1156 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1157 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1158 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1159 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1160 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1161 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1163 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1164 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1166 for my $mr (@$masters) {
1167 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1168 my $others = OpenILS::Application::Ingest->storage_req(
1169 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1172 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1173 $mr->master_record($others->[0]->source);
1174 OpenILS::Application::Ingest->storage_req(
1175 'open-ils.storage.direct.metabib.metarecord.remote_update',
1177 { master_record => $others->[0]->source, mods => undef }
1180 warn "Removing metarecord whose master is $rec";
1181 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1182 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1183 warn "Metarecord removed";
1184 $log->debug( "Metarecord removed", DEBUG);
1188 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1191 $log->debug('Scrubbing failed : '.shift(), ERROR);
1192 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1196 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1197 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1200 __PACKAGE__->register_method(
1201 api_name => "open-ils.worm.scrub.biblio",
1202 method => "scrub_metabib_record",
1207 sub wormize_biblio_metarecord {
1212 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1215 for my $r (@$recs) {
1218 $success = wormize_biblio_record($self => $client => $r->source);
1220 { record => $r->source,
1221 metarecord => $rec->metarecord,
1222 success => $success,
1225 } catch Error with {
1228 { record => $r->source,
1229 metarecord => $rec->metarecord,
1230 success => $success,
1238 __PACKAGE__->register_method(
1239 api_name => "open-ils.worm.wormize.metarecord",
1240 method => "wormize_biblio_metarecord",
1245 __PACKAGE__->register_method(
1246 api_name => "open-ils.worm.wormize.metarecord.nomap",
1247 method => "wormize_biblio_metarecord",
1252 __PACKAGE__->register_method(
1253 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1254 method => "wormize_biblio_metarecord",
1259 __PACKAGE__->register_method(
1260 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1261 method => "wormize_biblio_metarecord",
1268 sub wormize_biblio_record {
1273 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1274 $rec = OpenILS::Application::Ingest->storage_req(
1275 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1281 if (!OpenILS::Application::Ingest->in_transaction) {
1282 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1288 # clean up the cruft
1289 unless ($self->api_name =~ /noscrub/o) {
1290 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1294 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1297 my @rec_descriptor = ();
1305 my %metarecord = ();
1306 my @source_map = ();
1307 for my $r (@$bibs) {
1309 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1311 my $xml = $parser->parse_string($r->marc);
1313 #update the fingerprint
1314 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1315 OpenILS::Application::Ingest->storage_req(
1316 'open-ils.storage.direct.biblio.record_entry.remote_update',
1318 { fingerprint => $fp->{fingerprint},
1319 quality => int($fp->{quality}) }
1320 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1322 # the full_rec stuff
1323 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1324 $fr->record( $r->id );
1325 push @full_rec, $fr;
1328 # the rec_descriptor stuff
1329 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1330 $rd->record( $r->id );
1331 push @rec_descriptor, $rd;
1333 # the indexing field entry stuff
1334 for my $class ( qw/title author subject keyword series/ ) {
1335 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1336 $fe->source( $r->id );
1337 push @{$field_entry{$class}}, $fe;
1341 unless ($self->api_name =~ /nomap/o) {
1342 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1345 $mr = Fieldmapper::metabib::metarecord->new;
1346 $mr->fingerprint( $fp->{fingerprint} );
1347 $mr->master_record( $r->id );
1348 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1351 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1352 $mr_map->metarecord( $mr->id );
1353 $mr_map->source( $r->id );
1354 push @source_map, $mr_map;
1356 $metarecord{$mr->id} = $mr;
1358 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1360 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1361 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1366 if (@rec_descriptor) {
1367 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1369 OpenILS::Application::Ingest->storage_req(
1370 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1374 for my $mr ( values %metarecord ) {
1375 my $sources = OpenILS::Application::Ingest->storage_req(
1376 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1380 my $bibs = OpenILS::Application::Ingest->storage_req(
1381 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1382 [ map { $_->source } @$sources ]
1385 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1387 OpenILS::Application::Ingest->storage_req(
1388 'open-ils.storage.direct.metabib.metarecord.remote_update',
1390 { master_record => $master->id, mods => undef }
1394 OpenILS::Application::Ingest->storage_req(
1395 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1397 ) if (@rec_descriptor);
1399 OpenILS::Application::Ingest->storage_req(
1400 'open-ils.storage.direct.metabib.full_rec.batch.create',
1404 OpenILS::Application::Ingest->storage_req(
1405 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1406 @{ $field_entry{title} }
1407 ) if (@{ $field_entry{title} });
1409 OpenILS::Application::Ingest->storage_req(
1410 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1411 @{ $field_entry{author} }
1412 ) if (@{ $field_entry{author} });
1414 OpenILS::Application::Ingest->storage_req(
1415 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1416 @{ $field_entry{subject} }
1417 ) if (@{ $field_entry{subject} });
1419 OpenILS::Application::Ingest->storage_req(
1420 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1421 @{ $field_entry{keyword} }
1422 ) if (@{ $field_entry{keyword} });
1424 OpenILS::Application::Ingest->storage_req(
1425 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1426 @{ $field_entry{series} }
1427 ) if (@{ $field_entry{series} });
1429 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1435 $log->debug('Wormization failed : '.shift(), ERROR);
1436 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1440 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1441 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1444 __PACKAGE__->register_method(
1445 api_name => "open-ils.worm.wormize.biblio",
1446 method => "wormize_biblio_record",
1450 __PACKAGE__->register_method(
1451 api_name => "open-ils.worm.wormize.biblio.nomap",
1452 method => "wormize_biblio_record",
1456 __PACKAGE__->register_method(
1457 api_name => "open-ils.worm.wormize.biblio.noscrub",
1458 method => "wormize_biblio_record",
1462 __PACKAGE__->register_method(
1463 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1464 method => "wormize_biblio_record",
1469 sub wormize_authority_record {
1475 if (!OpenILS::Application::Ingest->in_transaction) {
1476 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1482 # clean up the cruft
1483 unless ($self->api_name =~ /noscrub/o) {
1484 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1488 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1491 my @rec_descriptor = ();
1492 for my $r (@$bibs) {
1493 my $xml = $parser->parse_string($r->marc);
1495 # the full_rec stuff
1496 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1497 $fr->record( $r->id );
1498 push @full_rec, $fr;
1501 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1502 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1503 #$rd->record( $r->id );
1504 #push @rec_descriptor, $rd;
1508 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1510 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1511 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1513 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1516 $log->debug('Wormization failed : '.shift(), ERROR);
1517 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1521 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1522 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1525 __PACKAGE__->register_method(
1526 api_name => "open-ils.worm.wormize.authority",
1527 method => "wormize_authority_record",
1531 __PACKAGE__->register_method(
1532 api_name => "open-ils.worm.wormize.authority.noscrub",
1533 method => "wormize_authority_record",
1539 # --------------------------------------------------------------------------------
1540 # MARC index extraction
1542 package OpenILS::Application::Ingest::XPATH;
1543 use base qw/OpenILS::Application::Ingest/;
1544 use Unicode::Normalize;
1546 # give this a MODS documentElement and an XPATH expression
1547 sub _xpath_to_string {
1551 my $ns_prefix = shift;
1554 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1558 # grab the set of matching nodes
1559 my @nodes = $xml->findnodes( $xpath );
1560 for my $value (@nodes) {
1562 # grab all children of the node
1563 my @children = $value->childNodes();
1564 for my $child (@children) {
1566 # add the childs content to the growing buffer
1567 my $content = quotemeta($child->textContent);
1568 next if ($unique && $string =~ /$content/); # uniquify the values
1569 $string .= $child->textContent . " ";
1572 $string .= $value->textContent . " ";
1575 return NFD($string);
1578 sub class_all_index_string_xml {
1584 OpenILS::Application::Ingest->post_init();
1585 $xml = $parser->parse_string($xml) unless (ref $xml);
1587 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1588 for my $type ( keys %{ $xpathset->{$class} } ) {
1589 my $value = _xpath_to_string(
1590 $mods_sheet->transform($xml)->documentElement,
1591 $xpathset->{$class}->{$type}->{xpath},
1592 "http://www.loc.gov/mods/",
1599 $value = NFD($value);
1600 $value =~ s/\pM+//sgo;
1601 $value =~ s/\pC+//sgo;
1602 $value =~ s/\W+$//sgo;
1604 $value =~ s/(\w)\./$1/sgo;
1605 $value = lc($value);
1607 my $fm = $class_constructor->new;
1608 $fm->value( $value );
1609 $fm->field( $xpathset->{$class}->{$type}->{id} );
1610 $client->respond($fm);
1614 __PACKAGE__->register_method(
1615 api_name => "open-ils.worm.field_entry.class.xml",
1616 method => "class_all_index_string_xml",
1622 sub class_all_index_string_record {
1628 OpenILS::Application::Ingest->post_init();
1629 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1631 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1633 $client->respond($fm);
1637 __PACKAGE__->register_method(
1638 api_name => "open-ils.worm.field_entry.class.record",
1639 method => "class_all_index_string_record",
1646 sub class_index_string_xml {
1653 OpenILS::Application::Ingest->post_init();
1654 $xml = $parser->parse_string($xml) unless (ref $xml);
1655 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1657 __PACKAGE__->register_method(
1658 api_name => "open-ils.worm.class.type.xml",
1659 method => "class_index_string_xml",
1664 sub class_index_string_record {
1671 OpenILS::Application::Ingest->post_init();
1672 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1674 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1675 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1678 __PACKAGE__->register_method(
1679 api_name => "open-ils.worm.class.type.record",
1680 method => "class_index_string_record",
1694 OpenILS::Application::Ingest->post_init();
1695 $xml = $parser->parse_string($xml) unless (ref $xml);
1696 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1698 __PACKAGE__->register_method(
1699 api_name => "open-ils.worm.xpath.xml",
1700 method => "xml_xpath",
1714 OpenILS::Application::Ingest->post_init();
1715 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1717 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1718 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1721 __PACKAGE__->register_method(
1722 api_name => "open-ils.worm.xpath.record",
1723 method => "record_xpath",
1729 # --------------------------------------------------------------------------------
1732 package OpenILS::Application::Ingest::Biblio::Leader;
1733 use base qw/OpenILS::Application::Ingest/;
1734 use Unicode::Normalize;
1736 our %marc_type_groups = (
1739 VIS => q/[gkro]{1}/,
1748 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1752 our %biblio_descriptor_code = (
1753 item_type => sub { substr($ldr,6,1); },
1756 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1757 return substr($oo8,29,1);
1758 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1759 return substr($oo8,23,1);
1763 bib_level => sub { substr($ldr,7,1); },
1764 control_type => sub { substr($ldr,8,1); },
1765 char_encoding => sub { substr($ldr,9,1); },
1766 enc_level => sub { substr($ldr,17,1); },
1767 cat_form => sub { substr($ldr,18,1); },
1768 pub_status => sub { substr($ldr,5,1); },
1769 item_lang => sub { substr($oo8,35,3); },
1770 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1771 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1772 audience => sub { substr($oo8,22,1); },
1775 sub _extract_biblio_descriptors {
1778 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1779 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1780 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1782 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1783 for my $rd_field ( keys %biblio_descriptor_code ) {
1784 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1790 sub extract_biblio_desc_xml {
1795 $xml = $parser->parse_string($xml) unless (ref $xml);
1797 return _extract_biblio_descriptors( $xml );
1799 __PACKAGE__->register_method(
1800 api_name => "open-ils.worm.biblio_leader.xml",
1801 method => "extract_biblio_desc_xml",
1806 sub extract_biblio_desc_record {
1811 OpenILS::Application::Ingest->post_init();
1812 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1814 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1815 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1818 __PACKAGE__->register_method(
1819 api_name => "open-ils.worm.biblio_leader.record",
1820 method => "extract_biblio_desc_record",
1825 # --------------------------------------------------------------------------------
1828 package OpenILS::Application::Ingest::FlatMARC;
1829 use base qw/OpenILS::Application::Ingest/;
1830 use Unicode::Normalize;
1833 sub _marcxml_to_full_rows {
1835 my $marcxml = shift;
1836 my $xmltype = shift || 'metabib';
1838 my $type = "Fieldmapper::${xmltype}::full_rec";
1842 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1844 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1845 next unless $tagline;
1847 my $ns = $type->new;
1850 my $val = $tagline->textContent;
1852 $val =~ s/\pM+//sgo;
1853 $val =~ s/\pC+//sgo;
1854 $val =~ s/\W+$//sgo;
1860 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1861 next unless $tagline;
1863 my $ns = $type->new;
1865 $ns->tag( $tagline->getAttribute( "tag" ) );
1866 my $val = $tagline->textContent;
1868 $val =~ s/\pM+//sgo;
1869 $val =~ s/\pC+//sgo;
1870 $val =~ s/\W+$//sgo;
1876 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1877 next unless $tagline;
1879 my $tag = $tagline->getAttribute( "tag" );
1880 my $ind1 = $tagline->getAttribute( "ind1" );
1881 my $ind2 = $tagline->getAttribute( "ind2" );
1883 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1886 my $ns = $type->new;
1891 $ns->subfield( $data->getAttribute( "code" ) );
1892 my $val = $data->textContent;
1894 $val =~ s/\pM+//sgo;
1895 $val =~ s/\pC+//sgo;
1896 $val =~ s/\W+$//sgo;
1897 $ns->value( lc($val) );
1903 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1912 $xml = $parser->parse_string($xml) unless (ref $xml);
1914 my $type = 'metabib';
1915 $type = 'authority' if ($self->api_name =~ /authority/o);
1917 OpenILS::Application::Ingest->post_init();
1919 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1922 __PACKAGE__->register_method(
1923 api_name => "open-ils.worm.flat_marc.authority.xml",
1924 method => "flat_marc_xml",
1929 __PACKAGE__->register_method(
1930 api_name => "open-ils.worm.flat_marc.biblio.xml",
1931 method => "flat_marc_xml",
1937 sub flat_marc_record {
1942 my $type = 'biblio';
1943 $type = 'authority' if ($self->api_name =~ /authority/o);
1945 OpenILS::Application::Ingest->post_init();
1946 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1948 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1951 __PACKAGE__->register_method(
1952 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1953 method => "flat_marc_record",
1958 __PACKAGE__->register_method(
1959 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1960 method => "flat_marc_record",
1967 # --------------------------------------------------------------------------------
1970 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1971 use base qw/OpenILS::Application::Ingest/;
1972 use Unicode::Normalize;
1973 use OpenSRF::EX qw/:try/;
1975 my @fp_mods_xpath = (
1976 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1979 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1980 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1981 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1982 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1985 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1987 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1988 $text =~ s/\pM+//gso;
1989 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1991 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1992 $text =~ s/\s+/ /sgo;
1993 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1994 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1995 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1996 $text =~ s/\b(?:the|an?)\b//sgo;
1997 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1998 $text =~ s/\[.[^\]]+\]//sgo;
1999 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2000 $text =~ s/\s*[;\/\.]*$//sgo;
2001 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2006 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2007 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2010 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2012 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2013 $text =~ s/\pM+//gso;
2014 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2016 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2017 $text =~ s/\s+/ /sgo;
2018 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2019 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2020 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2021 $text =~ s/,?\s+.*$//sgo;
2022 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2027 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2030 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2031 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2032 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2033 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2034 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2035 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2036 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2037 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2040 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2042 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2043 $text =~ s/\pM+//gso;
2044 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2046 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2047 $text =~ s/\s+/ /sgo;
2048 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2049 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2050 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2051 $text =~ s/\b(?:the|an?)\b//sgo;
2052 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2053 $text =~ s/\[.[^\]]+\]//sgo;
2054 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2055 $text =~ s/\s*[;\/\.]*$//sgo;
2056 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2061 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2062 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2063 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2064 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2067 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2069 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2070 $text =~ s/\pM+//gso;
2071 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2073 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2074 $text =~ s/\s+/ /sgo;
2075 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2077 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2078 $text =~ s/,?\s+.*$//sgo;
2079 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2086 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2090 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2094 my $match_index = 0;
2095 my $block_index = 1;
2096 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2097 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2099 my $block_name_index = 0;
2100 my $block_value_index = 1;
2101 my $block = $fp_mods_xpath[$block_index];
2102 while ( my $part = $$block[$block_value_index] ) {
2104 for my $xpath ( @{ $part->{xpath} } ) {
2105 $text = $mods->findvalue( $xpath );
2109 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2113 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2114 $fp_string .= $text;
2117 $block_name_index += 2;
2118 $block_value_index += 2;
2122 $fp_string =~ s/\W+//gso;
2123 $log->debug("Fingerprint is [$fp_string]", INFO);;
2133 sub refingerprint_bibrec {
2139 if (!OpenILS::Application::Ingest->in_transaction) {
2140 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2146 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2147 for my $b (@$bibs) {
2148 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2150 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2152 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2154 OpenILS::Application::Ingest->storage_req(
2155 'open-ils.storage.direct.biblio.record_entry.remote_update',
2157 { fingerprint => $fp->{fingerprint},
2158 quality => $fp->{quality} }
2161 if ($self->api_name !~ /nomap/o) {
2162 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2163 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2168 if (ref($old_source_map) and @$old_source_map) {
2169 for my $m (@$old_source_map) {
2170 $old_mrid = $m->metarecord;
2171 OpenILS::Application::Ingest->storage_req(
2172 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2178 my $old_sm = OpenILS::Application::Ingest->storage_req(
2179 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2180 { metarecord => $old_mrid }
2183 if (ref($old_sm) and @$old_sm == 0) {
2184 OpenILS::Application::Ingest->storage_req(
2185 'open-ils.storage.direct.metabib.metarecord.delete',
2190 my $mr = OpenILS::Application::Ingest->storage_req(
2191 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2192 { fingerprint => $fp->{fingerprint} }
2196 $mr = Fieldmapper::metabib::metarecord->new;
2197 $mr->fingerprint( $fp->{fingerprint} );
2198 $mr->master_record( $b->id );
2199 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2202 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2203 $mr_map->metarecord( $mr->id );
2204 $mr_map->source( $b->id );
2205 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2209 $client->respond($b->id);
2213 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2217 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2218 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2221 __PACKAGE__->register_method(
2222 api_name => "open-ils.worm.fingerprint.record.update",
2223 method => "refingerprint_bibrec",
2229 __PACKAGE__->register_method(
2230 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2231 method => "refingerprint_bibrec",
2238 sub fingerprint_bibrec {
2243 OpenILS::Application::Ingest->post_init();
2244 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2246 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2247 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2251 __PACKAGE__->register_method(
2252 api_name => "open-ils.worm.fingerprint.record",
2253 method => "fingerprint_bibrec",
2259 sub fingerprint_mods {
2264 OpenILS::Application::Ingest->post_init();
2265 my $mods = $parser->parse_string($xml)->documentElement;
2267 return _fp_mods( $mods );
2269 __PACKAGE__->register_method(
2270 api_name => "open-ils.worm.fingerprint.mods",
2271 method => "fingerprint_mods",
2276 sub fingerprint_marc {
2281 $xml = $parser->parse_string($xml) unless (ref $xml);
2283 OpenILS::Application::Ingest->post_init();
2284 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2285 $log->debug("Returning [$fp] as fingerprint", INFO);
2288 __PACKAGE__->register_method(
2289 api_name => "open-ils.worm.fingerprint.marc",
2290 method => "fingerprint_marc",
2298 sub biblio_fingerprint_record {
2303 OpenILS::Application::Ingest->post_init();
2305 my $marc = OpenILS::Application::Ingest
2306 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2309 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2310 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2313 __PACKAGE__->register_method(
2314 api_name => "open-ils.worm.fingerprint.record",
2315 method => "biblio_fingerprint_record",
2321 sub biblio_fingerprint {
2326 OpenILS::Application::Ingest->post_init();
2328 $marc = $parser->parse_string($marc) unless (ref $marc);
2330 my $mods = OpenILS::Application::Ingest::entityize(
2332 ->transform( $marc )
2338 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2341 $log->internal("Got MARC [$marc]");
2342 $log->internal("Created MODS [$mods]");
2345 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2346 my $conf = OpenSRF::Utils::SettingsClient->new;
2348 my $libs = $conf->config_value(@pfx, 'script_path');
2349 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2350 my $script_libs = (ref($libs)) ? $libs : [$libs];
2352 $log->debug("Loading script $script_file for biblio fingerprinting...");
2354 $fp_script = new OpenILS::Utils::ScriptRunner
2355 ( file => $script_file,
2356 paths => $script_libs,
2357 reset_count => 1000 );
2360 $log->debug("Applying environment for biblio fingerprinting...");
2362 my $env = {marc => $marc, mods => $mods};
2363 #my $res = {fingerprint => '', quality => '0'};
2365 $fp_script->insert('environment' => $env);
2366 #$fp_script->insert('result' => $res);
2368 $log->debug("Running script for biblio fingerprinting...");
2370 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2372 $log->debug("Script for biblio fingerprinting completed successfully...");
2376 __PACKAGE__->register_method(
2377 api_name => "open-ils.worm.fingerprint.marc",
2378 method => "biblio_fingerprint",
2383 # --------------------------------------------------------------------------------
2397 my $create_source_map;
2412 my %descriptor_code = (
2413 item_type => 'substr($ldr,6,1)',
2414 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2415 bib_level => 'substr($ldr,7,1)',
2416 control_type => 'substr($ldr,8,1)',
2417 char_encoding => 'substr($ldr,9,1)',
2418 enc_level => 'substr($ldr,17,1)',
2419 cat_form => 'substr($ldr,18,1)',
2420 pub_status => 'substr($ldr,5,1)',
2421 item_lang => 'substr($oo8,35,3)',
2422 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2423 audience => 'substr($oo8,22,1)',
2433 if ($self->api_name =~ /no_map/o) {
2437 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2439 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2441 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2443 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2445 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2446 unless ($sm_lookup);
2447 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2448 unless ($mr_lookup);
2449 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2450 unless ($mr_update);
2451 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2453 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2454 unless ($update_entry);
2455 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2456 unless ($rm_old_sm);
2457 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2458 unless ($rm_old_rd);
2459 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2460 unless ($rm_old_fr);
2461 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2462 unless ($rm_old_tr);
2463 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2464 unless ($rm_old_ar);
2465 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2466 unless ($rm_old_sr);
2467 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2468 unless ($rm_old_kr);
2469 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2470 unless ($rm_old_ser);
2471 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2472 unless ($mr_create);
2473 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2474 unless ($create_source_map);
2475 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2476 unless ($rd_create);
2477 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2478 unless ($fr_create);
2479 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2480 unless ($$create{title});
2481 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2482 unless ($$create{author});
2483 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2484 unless ($$create{subject});
2485 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2486 unless ($$create{keyword});
2487 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2488 unless ($$create{series});
2491 my ($outer_xact) = $in_xact->run;
2493 unless ($outer_xact) {
2494 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2495 my ($r) = $begin->run($client);
2496 unless (defined $r and $r) {
2498 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2501 } catch Error with {
2502 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2512 for my $entry ( $lookup->run(@docids) ) {
2513 # step -1: grab the doc from storage
2514 next unless ($entry);
2517 my $xslt_doc = $parser->parse_file(
2518 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2519 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2522 my $xml = $entry->marc;
2523 my $docid = $entry->id;
2524 my $marcdoc = $parser->parse_string($xml);
2525 my $modsdoc = $mods_sheet->transform($marcdoc);
2527 my $mods = $modsdoc->documentElement;
2528 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2530 $entry->fingerprint( fingerprint_mods( $mods ) );
2531 push @entry_list, $entry;
2533 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2536 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2537 if (!$mr || !@$mr) {
2538 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2539 $mr = new Fieldmapper::metabib::metarecord;
2540 $mr->fingerprint( $entry->fingerprint );
2541 $mr->master_record( $entry->id );
2542 my ($new_mr) = $mr_create->run($mr);
2544 unless (defined $mr) {
2545 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2548 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2553 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2554 $sm->metarecord( $mr->id );
2555 $sm->source( $entry->id );
2556 push @source_maps, $sm;
2559 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2560 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2562 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2563 for my $rd_field ( keys %descriptor_code ) {
2564 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2566 $rd_obj->record( $docid );
2567 push @rd_list, $rd_obj;
2569 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2571 # step 2: build the KOHA rows
2572 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2573 $_->record( $docid ) for (@tmp_list);
2574 push @ns_list, @tmp_list;
2578 last unless ($self->api_name =~ /batch$/o);
2581 $rm_old_rd->run( { record => \@docids } );
2582 $rm_old_fr->run( { record => \@docids } );
2583 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2584 $rm_old_tr->run( { source => \@docids } );
2585 $rm_old_ar->run( { source => \@docids } );
2586 $rm_old_sr->run( { source => \@docids } );
2587 $rm_old_kr->run( { source => \@docids } );
2588 $rm_old_ser->run( { source => \@docids } );
2591 my ($sm) = $create_source_map->run(@source_maps);
2592 unless (defined $sm) {
2593 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2595 my ($mr) = $mr_update->run(@mr_list);
2596 unless (defined $mr) {
2597 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2601 my ($re) = $update_entry->run(@entry_list);
2602 unless (defined $re) {
2603 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2606 my ($rd) = $rd_create->run(@rd_list);
2607 unless (defined $rd) {
2608 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2611 my ($fr) = $fr_create->run(@ns_list);
2612 unless (defined $fr) {
2613 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2616 # step 5: insert the new metadata
2617 for my $class ( qw/title author subject keyword series/ ) {
2619 for my $doc ( @mods_data ) {
2620 my ($did) = keys %$doc;
2621 my ($data) = values %$doc;
2623 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2624 for my $row ( keys %{ $$data{$class} } ) {
2625 next unless (exists $$data{$class}{$row});
2626 next unless ($$data{$class}{$row}{value});
2627 my $fm_obj = $fm_constructor->new;
2628 $fm_obj->value( $$data{$class}{$row}{value} );
2629 $fm_obj->field( $$data{$class}{$row}{field_id} );
2630 $fm_obj->source( $did );
2631 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2633 push @md_list, $fm_obj;
2637 my ($cr) = $$create{$class}->run(@md_list);
2638 unless (defined $cr) {
2639 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2643 unless ($outer_xact) {
2644 $log->debug("Commiting transaction started by the Ingest.", INFO);
2645 my ($c) = $commit->run;
2646 unless (defined $c and $c) {
2648 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2654 __PACKAGE__->register_method(
2655 api_name => "open-ils.worm.wormize",
2656 method => "wormize",
2660 __PACKAGE__->register_method(
2661 api_name => "open-ils.worm.wormize.no_map",
2662 method => "wormize",
2666 __PACKAGE__->register_method(
2667 api_name => "open-ils.worm.wormize.batch",
2668 method => "wormize",
2672 __PACKAGE__->register_method(
2673 api_name => "open-ils.worm.wormize.no_map.batch",
2674 method => "wormize",
2689 my $acreate_source_map;
2704 sub authority_wormize {
2711 if ($self->api_name =~ /no_map/o) {
2715 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2717 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2719 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2721 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2723 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2725 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2726 unless ($aupdate_entry);
2727 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2728 unless ($arm_old_rd);
2729 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2730 unless ($arm_old_fr);
2731 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2732 unless ($ard_create);
2733 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2734 unless ($afr_create);
2737 my ($outer_xact) = $in_xact->run;
2739 unless ($outer_xact) {
2740 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2741 my ($r) = $begin->run($client);
2742 unless (defined $r and $r) {
2744 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2747 } catch Error with {
2748 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2758 for my $entry ( $lookup->run(@docids) ) {
2759 # step -1: grab the doc from storage
2760 next unless ($entry);
2763 # my $xslt_doc = $parser->parse_file(
2764 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2765 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2768 my $xml = $entry->marc;
2769 my $docid = $entry->id;
2770 my $marcdoc = $parser->parse_string($xml);
2771 #my $madsdoc = $mads_sheet->transform($marcdoc);
2773 #my $mads = $madsdoc->documentElement;
2774 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2776 push @entry_list, $entry;
2778 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2779 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2781 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2782 for my $rd_field ( keys %descriptor_code ) {
2783 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2785 $rd_obj->record( $docid );
2786 push @rd_list, $rd_obj;
2788 # step 2: build the KOHA rows
2789 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2790 $_->record( $docid ) for (@tmp_list);
2791 push @ns_list, @tmp_list;
2795 last unless ($self->api_name =~ /batch$/o);
2798 $arm_old_rd->run( { record => \@docids } );
2799 $arm_old_fr->run( { record => \@docids } );
2801 my ($rd) = $ard_create->run(@rd_list);
2802 unless (defined $rd) {
2803 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2806 my ($fr) = $fr_create->run(@ns_list);
2807 unless (defined $fr) {
2808 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2811 unless ($outer_xact) {
2812 $log->debug("Commiting transaction started by Ingest.", INFO);
2813 my ($c) = $commit->run;
2814 unless (defined $c and $c) {
2816 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2822 __PACKAGE__->register_method(
2823 api_name => "open-ils.worm.authortiy.wormize",
2824 method => "wormize",
2828 __PACKAGE__->register_method(
2829 api_name => "open-ils.worm.authority.wormize.batch",
2830 method => "wormize",
2836 # --------------------------------------------------------------------------------
2839 sub _marcxml_to_full_rows {
2841 my $marcxml = shift;
2842 my $type = shift || 'Fieldmapper::metabib::full_rec';
2846 my $root = $marcxml->documentElement;
2848 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2849 next unless $tagline;
2851 my $ns = new Fieldmapper::metabib::full_rec;
2854 my $val = NFD($tagline->textContent);
2855 $val =~ s/(\pM+)//gso;
2861 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2862 next unless $tagline;
2864 my $ns = new Fieldmapper::metabib::full_rec;
2866 $ns->tag( $tagline->getAttribute( "tag" ) );
2867 my $val = NFD($tagline->textContent);
2868 $val =~ s/(\pM+)//gso;
2874 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2875 next unless $tagline;
2877 my $tag = $tagline->getAttribute( "tag" );
2878 my $ind1 = $tagline->getAttribute( "ind1" );
2879 my $ind2 = $tagline->getAttribute( "ind2" );
2881 for my $data ( $tagline->childNodes ) {
2884 my $ns = $type->new;
2889 $ns->subfield( $data->getAttribute( "code" ) );
2890 my $val = NFD($data->textContent);
2891 $val =~ s/(\pM+)//gso;
2892 $ns->value( lc($val) );
2900 sub _get_field_value {
2902 my( $root, $xpath ) = @_;
2906 # grab the set of matching nodes
2907 my @nodes = $root->findnodes( $xpath );
2908 for my $value (@nodes) {
2910 # grab all children of the node
2911 my @children = $value->childNodes();
2912 for my $child (@children) {
2914 # add the childs content to the growing buffer
2915 my $content = quotemeta($child->textContent);
2916 next if ($string =~ /$content/); # uniquify the values
2917 $string .= $child->textContent . " ";
2920 $string .= $value->textContent . " ";
2923 $string = NFD($string);
2924 $string =~ s/(\pM)//gso;
2929 sub modsdoc_to_values {
2930 my( $self, $mods ) = @_;
2932 for my $class (keys %$xpathset) {
2933 $data->{$class} = {};
2934 for my $type (keys %{$xpathset->{$class}}) {
2935 $data->{$class}->{$type} = {};
2936 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};