1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use OpenSRF::Utils::JSON;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
21 our %supported_formats = (
22 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
23 mods => {ns => 'http://www.loc.gov/mods/'},
24 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
26 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28 atom => {ns => 'http://www.w3.org/2005/Atom'},
29 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
33 rss10 => {ns => 'http://purl.org/rss/1.0/'},
34 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
39 my $log = 'OpenSRF::Utils::Logger';
41 my $parser = XML::LibXML->new();
42 my $xslt = XML::LibXSLT->new();
52 unless (keys %$xpathset) {
53 $log->debug("Running post_init", DEBUG);
55 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57 unless ($supported_formats{mods}{xslt}) {
58 $log->debug("Loading MODS XSLT", DEBUG);
59 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63 unless ($supported_formats{mods3}{xslt}) {
64 $log->debug("Loading MODS v3 XSLT", DEBUG);
65 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
70 my $req = OpenSRF::AppSession
71 ->create('open-ils.cstore')
73 # XXX testing new metabib field use for faceting
74 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
79 if (ref $req and @$req) {
81 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
82 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
83 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
84 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
100 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
104 # --------------------------------------------------------------------------------
107 package OpenILS::Application::Ingest::Biblio;
108 use base qw/OpenILS::Application::Ingest/;
109 use Unicode::Normalize;
111 sub rw_biblio_ingest_single_object {
116 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
117 return undef unless ($blob);
119 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
120 $bib->quality( $blob->{fingerprint}->{quality} );
122 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
124 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
126 # update full_rec stuff ...
127 my $tmp = $cstore->request(
128 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
129 { record => $bib->id }
132 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
133 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
135 # update rec_descriptor stuff ...
136 $tmp = $cstore->request(
137 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
138 { record => $bib->id }
141 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
142 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
144 # deal with classed fields...
145 for my $class ( qw/title author subject keyword series/ ) {
146 $tmp = $cstore->request(
147 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
148 { source => $bib->id }
151 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
153 for my $obj ( @{ $blob->{field_entries} } ) {
154 my $class = $obj->class_name;
155 $class =~ s/^Fieldmapper:://o;
157 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
162 $tmp = $cstore->request(
163 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
164 { source => $bib->id }
167 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
170 my $old_mrs = $cstore->request(
171 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
172 )->gather(1) if (@$tmp);
174 $old_mrs = [] if (!ref($old_mrs));
177 for my $m (@$old_mrs) {
178 if ($m->fingerprint eq $bib->fingerprint) {
181 my $others = $cstore->request(
182 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
187 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
197 # Get the matchin MR, if any.
198 $mr = $cstore->request(
199 'open-ils.cstore.direct.metabib.metarecord.search',
200 { fingerprint => $bib->fingerprint }
203 $holds = $cstore->request(
204 'open-ils.cstore.direct.action.hold_request.search.atomic',
205 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
206 )->gather(1) if (@$old_mrs);
209 for my $h (@$holds) {
211 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
218 $mr = new Fieldmapper::metabib::metarecord;
219 $mr->fingerprint( $bib->fingerprint );
220 $mr->master_record( $bib->id );
223 "open-ils.cstore.direct.metabib.metarecord.create",
224 $mr => { quiet => 'true' }
228 for my $h (grep { !$_->ischanged } @$holds) {
230 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
233 my $mrm = $cstore->request(
234 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
235 { metarecord => $mr->id }
239 my $best = $cstore->request(
240 "open-ils.cstore.direct.biblio.record_entry.search",
241 { id => [ map { $_->source } @$mrm ] },
242 { 'select' => { bre => [ qw/id quality/ ] },
243 order_by => { bre => "quality desc" },
248 if ($best->quality > $bib->quality) {
249 $mr->master_record($best->id);
251 $mr->master_record($bib->id);
254 $mr->master_record($bib->id);
259 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
262 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
263 $mrm->source($bib->id);
264 $mrm->metarecord($mr->id);
266 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
267 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
269 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
273 __PACKAGE__->register_method(
274 api_name => "open-ils.ingest.full.biblio.object",
275 method => "rw_biblio_ingest_single_object",
280 sub rw_biblio_ingest_single_record {
285 OpenILS::Application::Ingest->post_init();
286 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
287 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
289 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
291 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
294 return undef unless ($r and @$r);
296 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
298 __PACKAGE__->register_method(
299 api_name => "open-ils.ingest.full.biblio.record",
300 method => "rw_biblio_ingest_single_record",
305 sub ro_biblio_ingest_single_object {
309 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
311 my $document = $parser->parse_string($xml);
313 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
314 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
315 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
316 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
318 $_->source($bib->id) for (@mXfe);
319 $_->record($bib->id) for (@mfr);
320 $rd->record($bib->id) if ($rd);
322 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
324 __PACKAGE__->register_method(
325 api_name => "open-ils.ingest.full.biblio.object.readonly",
326 method => "ro_biblio_ingest_single_object",
331 sub ro_biblio_ingest_single_xml {
334 my $xml = OpenILS::Application::Ingest::entityize(shift);
336 my $document = $parser->parse_string($xml);
338 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
339 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
340 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
341 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
343 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
345 __PACKAGE__->register_method(
346 api_name => "open-ils.ingest.full.biblio.xml.readonly",
347 method => "ro_biblio_ingest_single_xml",
352 sub ro_biblio_ingest_single_record {
357 OpenILS::Application::Ingest->post_init();
358 my $r = OpenSRF::AppSession
359 ->create('open-ils.cstore')
360 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
363 return undef unless ($r and @$r);
365 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
367 $_->source($rec) for (@{$res->{field_entries}});
368 $_->record($rec) for (@{$res->{full_rec}});
369 $res->{descriptor}->record($rec);
373 __PACKAGE__->register_method(
374 api_name => "open-ils.ingest.full.biblio.record.readonly",
375 method => "ro_biblio_ingest_single_record",
380 sub ro_biblio_ingest_stream_record {
384 OpenILS::Application::Ingest->post_init();
386 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
388 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
390 my $rec = $resp->content;
391 last unless (defined $rec);
393 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
394 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
396 $_->source($rec) for (@{$res->{field_entries}});
397 $_->record($rec) for (@{$res->{full_rec}});
399 $client->respond( $res );
404 __PACKAGE__->register_method(
405 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
406 method => "ro_biblio_ingest_stream_record",
411 sub ro_biblio_ingest_stream_xml {
415 OpenILS::Application::Ingest->post_init();
417 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
419 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
421 my $xml = $resp->content;
422 last unless (defined $xml);
424 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
425 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
427 $client->respond( $res );
432 __PACKAGE__->register_method(
433 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
434 method => "ro_biblio_ingest_stream_xml",
439 sub rw_biblio_ingest_stream_import {
443 OpenILS::Application::Ingest->post_init();
445 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
447 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
449 my $bib = $resp->content;
450 last unless (defined $bib);
452 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
453 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
455 $_->source($bib->id) for (@{$res->{field_entries}});
456 $_->record($bib->id) for (@{$res->{full_rec}});
458 $client->respond( $res );
463 __PACKAGE__->register_method(
464 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
465 method => "rw_biblio_ingest_stream_import",
471 # --------------------------------------------------------------------------------
474 package OpenILS::Application::Ingest::Authority;
475 use base qw/OpenILS::Application::Ingest/;
476 use Unicode::Normalize;
478 sub ro_authority_ingest_single_object {
482 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
484 my $document = $parser->parse_string($xml);
486 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
488 $_->record($bib->id) for (@mfr);
490 return { full_rec => \@mfr };
492 __PACKAGE__->register_method(
493 api_name => "open-ils.ingest.full.authority.object.readonly",
494 method => "ro_authority_ingest_single_object",
499 sub ro_authority_ingest_single_xml {
502 my $xml = OpenILS::Application::Ingest::entityize(shift);
504 my $document = $parser->parse_string($xml);
506 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
508 return { full_rec => \@mfr };
510 __PACKAGE__->register_method(
511 api_name => "open-ils.ingest.full.authority.xml.readonly",
512 method => "ro_authority_ingest_single_xml",
517 sub ro_authority_ingest_single_record {
522 OpenILS::Application::Ingest->post_init();
523 my $r = OpenSRF::AppSession
524 ->create('open-ils.cstore')
525 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
528 return undef unless ($r and @$r);
530 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
532 $_->record($rec) for (@{$res->{full_rec}});
533 $res->{descriptor}->record($rec);
537 __PACKAGE__->register_method(
538 api_name => "open-ils.ingest.full.authority.record.readonly",
539 method => "ro_authority_ingest_single_record",
544 sub ro_authority_ingest_stream_record {
548 OpenILS::Application::Ingest->post_init();
550 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
552 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
554 my $rec = $resp->content;
555 last unless (defined $rec);
557 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
558 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
560 $_->record($rec) for (@{$res->{full_rec}});
562 $client->respond( $res );
567 __PACKAGE__->register_method(
568 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
569 method => "ro_authority_ingest_stream_record",
574 sub ro_authority_ingest_stream_xml {
578 OpenILS::Application::Ingest->post_init();
580 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
582 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
584 my $xml = $resp->content;
585 last unless (defined $xml);
587 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
588 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
590 $client->respond( $res );
595 __PACKAGE__->register_method(
596 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
597 method => "ro_authority_ingest_stream_xml",
602 sub rw_authority_ingest_stream_import {
606 OpenILS::Application::Ingest->post_init();
608 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
610 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
612 my $bib = $resp->content;
613 last unless (defined $bib);
615 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
616 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
618 $_->record($bib->id) for (@{$res->{full_rec}});
620 $client->respond( $res );
625 __PACKAGE__->register_method(
626 api_name => "open-ils.ingest.full.authority.bib_stream.import",
627 method => "rw_authority_ingest_stream_import",
633 # --------------------------------------------------------------------------------
634 # MARC index extraction
636 package OpenILS::Application::Ingest::XPATH;
637 use base qw/OpenILS::Application::Ingest/;
638 use Unicode::Normalize;
640 # give this an XML documentElement and an XPATH expression
641 sub xpath_to_string {
645 my $ns_prefix = shift;
648 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
652 # grab the set of matching nodes
653 my @nodes = $xml->findnodes( $xpath );
654 for my $value (@nodes) {
656 # grab all children of the node
657 my @children = $value->childNodes();
658 for my $child (@children) {
660 # add the childs content to the growing buffer
661 my $content = quotemeta($child->textContent);
662 next if ($unique && $string =~ /$content/); # uniquify the values
663 $string .= $child->textContent . " ";
666 $string .= $value->textContent . " ";
672 sub class_index_string_xml {
678 OpenILS::Application::Ingest->post_init();
679 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
683 for my $class (@classes) {
684 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
685 for my $type ( keys %{ $xpathset->{$class} } ) {
687 my $def = $xpathset->{$class}->{$type};
688 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
693 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
694 $transform_cache{$def->{format}} = $document;
697 my $value = xpath_to_string(
698 $document->documentElement => $def->{xpath},
699 $sf->{ns} => $def->{format},
705 $value = NFD($value);
706 $value =~ s/\pM+//sgo;
707 $value =~ s/\pC+//sgo;
708 $value =~ s/\W+$//sgo;
710 $value =~ s/\b\.+\b//sgo;
713 my $fm = $class_constructor->new;
714 $fm->value( $value );
715 $fm->field( $xpathset->{$class}->{$type}->{id} );
716 $client->respond($fm);
721 __PACKAGE__->register_method(
722 api_name => "open-ils.ingest.field_entry.class.xml",
723 method => "class_index_string_xml",
729 sub class_index_string_record {
735 OpenILS::Application::Ingest->post_init();
736 my $r = OpenSRF::AppSession
737 ->create('open-ils.cstore')
738 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
741 return undef unless ($r and @$r);
743 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
745 $client->respond($fm);
749 __PACKAGE__->register_method(
750 api_name => "open-ils.ingest.field_entry.class.record",
751 method => "class_index_string_record",
757 sub all_index_string_xml {
762 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
763 $client->respond($fm);
767 __PACKAGE__->register_method(
768 api_name => "open-ils.ingest.extract.field_entry.all.xml",
769 method => "all_index_string_xml",
775 sub all_index_string_record {
780 OpenILS::Application::Ingest->post_init();
781 my $r = OpenSRF::AppSession
782 ->create('open-ils.cstore')
783 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
786 return undef unless ($r and @$r);
788 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
790 $client->respond($fm);
794 __PACKAGE__->register_method(
795 api_name => "open-ils.ingest.extract.field_entry.all.record",
796 method => "all_index_string_record",
802 # --------------------------------------------------------------------------------
805 package OpenILS::Application::Ingest::FlatMARC;
806 use base qw/OpenILS::Application::Ingest/;
807 use Unicode::Normalize;
810 sub _marcxml_to_full_rows {
813 my $xmltype = shift || 'metabib';
815 my $type = "Fieldmapper::${xmltype}::full_rec";
819 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
821 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
822 next unless $tagline;
827 my $val = $tagline->textContent;
837 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
838 next unless $tagline;
842 $ns->tag( $tagline->getAttribute( "tag" ) );
843 my $val = $tagline->textContent;
853 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
854 next unless $tagline;
856 my $tag = $tagline->getAttribute( "tag" );
857 my $ind1 = $tagline->getAttribute( "ind1" );
858 my $ind2 = $tagline->getAttribute( "ind2" );
860 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
868 $ns->subfield( $data->getAttribute( "code" ) );
869 my $val = $data->textContent;
874 $ns->value( lc($val) );
880 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
889 $log->debug("processing [$xml]");
891 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
893 my $type = 'metabib';
894 $type = 'authority' if ($self->api_name =~ /authority/o);
896 OpenILS::Application::Ingest->post_init();
898 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
901 __PACKAGE__->register_method(
902 api_name => "open-ils.ingest.flat_marc.authority.xml",
903 method => "flat_marc_xml",
908 __PACKAGE__->register_method(
909 api_name => "open-ils.ingest.flat_marc.biblio.xml",
910 method => "flat_marc_xml",
916 sub flat_marc_record {
922 $type = 'authority' if ($self->api_name =~ /authority/o);
924 OpenILS::Application::Ingest->post_init();
925 my $r = OpenSRF::AppSession
926 ->create('open-ils.cstore')
927 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
931 return undef unless ($r and $r->marc);
933 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
934 for my $row (@rows) {
935 $client->respond($row);
936 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
940 __PACKAGE__->register_method(
941 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
942 method => "flat_marc_record",
947 __PACKAGE__->register_method(
948 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
949 method => "flat_marc_record",
955 # --------------------------------------------------------------------------------
958 package OpenILS::Application::Ingest::Biblio::Fingerprint;
959 use base qw/OpenILS::Application::Ingest/;
960 use Unicode::Normalize;
961 use OpenSRF::EX qw/:try/;
963 sub biblio_fingerprint_record {
968 OpenILS::Application::Ingest->post_init();
970 my $r = OpenSRF::AppSession
971 ->create('open-ils.cstore')
972 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
975 return undef unless ($r and $r->marc);
977 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
978 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
979 $fp->{quality} = int($fp->{quality});
982 __PACKAGE__->register_method(
983 api_name => "open-ils.ingest.fingerprint.record",
984 method => "biblio_fingerprint_record",
990 sub biblio_fingerprint {
993 my $xml = OpenILS::Application::Ingest::entityize(shift);
995 $log->internal("Got MARC [$xml]");
998 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
999 my $conf = OpenSRF::Utils::SettingsClient->new;
1001 my $libs = $conf->config_value(@pfx, 'script_path');
1002 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1003 my $script_libs = (ref($libs)) ? $libs : [$libs];
1005 $log->debug("Loading script $script_file for biblio fingerprinting...");
1007 $fp_script = new OpenILS::Utils::ScriptRunner
1008 ( file => $script_file,
1009 paths => $script_libs,
1010 reset_count => 100 );
1013 $fp_script->insert('environment' => {marc => $xml} => 1);
1015 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1016 $log->debug("Script for biblio fingerprinting completed successfully...");
1020 __PACKAGE__->register_method(
1021 api_name => "open-ils.ingest.fingerprint.xml",
1022 method => "biblio_fingerprint",
1028 sub biblio_descriptor {
1031 my $xml = OpenILS::Application::Ingest::entityize(shift);
1033 $log->internal("Got MARC [$xml]");
1036 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1037 my $conf = OpenSRF::Utils::SettingsClient->new;
1039 my $libs = $conf->config_value(@pfx, 'script_path');
1040 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1041 my $script_libs = (ref($libs)) ? $libs : [$libs];
1043 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1045 $rd_script = new OpenILS::Utils::ScriptRunner
1046 ( file => $script_file,
1047 paths => $script_libs,
1048 reset_count => 100 );
1051 $log->debug("Setting up environment for descriptor extraction script...");
1052 $rd_script->insert('environment.marc' => $xml => 1);
1053 $log->debug("Environment building complete...");
1055 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1056 $log->debug("Script for biblio descriptor extraction completed successfully");
1060 __PACKAGE__->register_method(
1061 api_name => "open-ils.ingest.descriptor.xml",
1062 method => "biblio_descriptor",
1072 sub in_transaction {
1073 OpenILS::Application::Ingest->post_init();
1074 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1077 sub begin_transaction {
1081 OpenILS::Application::Ingest->post_init();
1082 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1086 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1087 #__PACKAGE__->st_sess->connect;
1088 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1089 unless (defined $r and $r) {
1090 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1091 #__PACKAGE__->st_sess->disconnect;
1092 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1096 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1099 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1102 sub rollback_transaction {
1106 OpenILS::Application::Ingest->post_init();
1107 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1111 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1113 $log->debug("Ingest isn't inside a transaction.", INFO);
1115 } catch Error with {
1116 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1122 sub commit_transaction {
1126 OpenILS::Application::Ingest->post_init();
1127 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1130 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1132 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1133 unless (defined $r and $r) {
1134 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1135 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1137 #__PACKAGE__->st_sess->disconnect;
1139 $log->debug("Ingest isn't inside a transaction.", INFO);
1141 } catch Error with {
1142 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1151 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1152 return shift( @res );
1155 sub scrub_authority_record {
1161 if (!OpenILS::Application::Ingest->in_transaction) {
1162 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1168 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1170 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1171 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1173 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1175 $log->debug('Scrubbing failed : '.shift(), ERROR);
1176 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1180 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1181 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1184 __PACKAGE__->register_method(
1185 api_name => "open-ils.worm.scrub.authority",
1186 method => "scrub_authority_record",
1192 sub scrub_metabib_record {
1197 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1198 $rec = OpenILS::Application::Ingest->storage_req(
1199 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1204 if (!OpenILS::Application::Ingest->in_transaction) {
1205 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1211 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1213 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1214 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1215 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1216 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1217 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1218 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1219 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1220 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1222 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1223 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1225 for my $mr (@$masters) {
1226 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1227 my $others = OpenILS::Application::Ingest->storage_req(
1228 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1231 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1232 $mr->master_record($others->[0]->source);
1233 OpenILS::Application::Ingest->storage_req(
1234 'open-ils.storage.direct.metabib.metarecord.remote_update',
1236 { master_record => $others->[0]->source, mods => undef }
1239 warn "Removing metarecord whose master is $rec";
1240 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1241 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1242 warn "Metarecord removed";
1243 $log->debug( "Metarecord removed", DEBUG);
1247 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1250 $log->debug('Scrubbing failed : '.shift(), ERROR);
1251 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1255 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1256 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1259 __PACKAGE__->register_method(
1260 api_name => "open-ils.worm.scrub.biblio",
1261 method => "scrub_metabib_record",
1266 sub wormize_biblio_metarecord {
1271 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1274 for my $r (@$recs) {
1277 $success = wormize_biblio_record($self => $client => $r->source);
1279 { record => $r->source,
1280 metarecord => $rec->metarecord,
1281 success => $success,
1284 } catch Error with {
1287 { record => $r->source,
1288 metarecord => $rec->metarecord,
1289 success => $success,
1297 __PACKAGE__->register_method(
1298 api_name => "open-ils.worm.wormize.metarecord",
1299 method => "wormize_biblio_metarecord",
1304 __PACKAGE__->register_method(
1305 api_name => "open-ils.worm.wormize.metarecord.nomap",
1306 method => "wormize_biblio_metarecord",
1311 __PACKAGE__->register_method(
1312 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1313 method => "wormize_biblio_metarecord",
1318 __PACKAGE__->register_method(
1319 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1320 method => "wormize_biblio_metarecord",
1327 sub wormize_biblio_record {
1332 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1333 $rec = OpenILS::Application::Ingest->storage_req(
1334 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1340 if (!OpenILS::Application::Ingest->in_transaction) {
1341 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1347 # clean up the cruft
1348 unless ($self->api_name =~ /noscrub/o) {
1349 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1353 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1356 my @rec_descriptor = ();
1364 my %metarecord = ();
1365 my @source_map = ();
1366 for my $r (@$bibs) {
1368 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1370 my $xml = $parser->parse_string($r->marc);
1372 #update the fingerprint
1373 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1374 OpenILS::Application::Ingest->storage_req(
1375 'open-ils.storage.direct.biblio.record_entry.remote_update',
1377 { fingerprint => $fp->{fingerprint},
1378 quality => int($fp->{quality}) }
1379 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1381 # the full_rec stuff
1382 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1383 $fr->record( $r->id );
1384 push @full_rec, $fr;
1387 # the rec_descriptor stuff
1388 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1389 $rd->record( $r->id );
1390 push @rec_descriptor, $rd;
1392 # the indexing field entry stuff
1393 for my $class ( qw/title author subject keyword series/ ) {
1394 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1395 $fe->source( $r->id );
1396 push @{$field_entry{$class}}, $fe;
1400 unless ($self->api_name =~ /nomap/o) {
1401 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1404 $mr = Fieldmapper::metabib::metarecord->new;
1405 $mr->fingerprint( $fp->{fingerprint} );
1406 $mr->master_record( $r->id );
1407 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1410 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1411 $mr_map->metarecord( $mr->id );
1412 $mr_map->source( $r->id );
1413 push @source_map, $mr_map;
1415 $metarecord{$mr->id} = $mr;
1417 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1419 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1420 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1425 if (@rec_descriptor) {
1426 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1428 OpenILS::Application::Ingest->storage_req(
1429 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1433 for my $mr ( values %metarecord ) {
1434 my $sources = OpenILS::Application::Ingest->storage_req(
1435 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1439 my $bibs = OpenILS::Application::Ingest->storage_req(
1440 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1441 [ map { $_->source } @$sources ]
1444 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1446 OpenILS::Application::Ingest->storage_req(
1447 'open-ils.storage.direct.metabib.metarecord.remote_update',
1449 { master_record => $master->id, mods => undef }
1453 OpenILS::Application::Ingest->storage_req(
1454 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1456 ) if (@rec_descriptor);
1458 OpenILS::Application::Ingest->storage_req(
1459 'open-ils.storage.direct.metabib.full_rec.batch.create',
1463 OpenILS::Application::Ingest->storage_req(
1464 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1465 @{ $field_entry{title} }
1466 ) if (@{ $field_entry{title} });
1468 OpenILS::Application::Ingest->storage_req(
1469 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1470 @{ $field_entry{author} }
1471 ) if (@{ $field_entry{author} });
1473 OpenILS::Application::Ingest->storage_req(
1474 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1475 @{ $field_entry{subject} }
1476 ) if (@{ $field_entry{subject} });
1478 OpenILS::Application::Ingest->storage_req(
1479 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1480 @{ $field_entry{keyword} }
1481 ) if (@{ $field_entry{keyword} });
1483 OpenILS::Application::Ingest->storage_req(
1484 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1485 @{ $field_entry{series} }
1486 ) if (@{ $field_entry{series} });
1488 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1494 $log->debug('Wormization failed : '.shift(), ERROR);
1495 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1499 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1500 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1503 __PACKAGE__->register_method(
1504 api_name => "open-ils.worm.wormize.biblio",
1505 method => "wormize_biblio_record",
1509 __PACKAGE__->register_method(
1510 api_name => "open-ils.worm.wormize.biblio.nomap",
1511 method => "wormize_biblio_record",
1515 __PACKAGE__->register_method(
1516 api_name => "open-ils.worm.wormize.biblio.noscrub",
1517 method => "wormize_biblio_record",
1521 __PACKAGE__->register_method(
1522 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1523 method => "wormize_biblio_record",
1528 sub wormize_authority_record {
1534 if (!OpenILS::Application::Ingest->in_transaction) {
1535 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1541 # clean up the cruft
1542 unless ($self->api_name =~ /noscrub/o) {
1543 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1547 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1550 my @rec_descriptor = ();
1551 for my $r (@$bibs) {
1552 my $xml = $parser->parse_string($r->marc);
1554 # the full_rec stuff
1555 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1556 $fr->record( $r->id );
1557 push @full_rec, $fr;
1560 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1561 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1562 #$rd->record( $r->id );
1563 #push @rec_descriptor, $rd;
1567 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1569 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1570 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1572 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1575 $log->debug('Wormization failed : '.shift(), ERROR);
1576 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1580 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1581 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1584 __PACKAGE__->register_method(
1585 api_name => "open-ils.worm.wormize.authority",
1586 method => "wormize_authority_record",
1590 __PACKAGE__->register_method(
1591 api_name => "open-ils.worm.wormize.authority.noscrub",
1592 method => "wormize_authority_record",
1598 # --------------------------------------------------------------------------------
1599 # MARC index extraction
1601 package OpenILS::Application::Ingest::XPATH;
1602 use base qw/OpenILS::Application::Ingest/;
1603 use Unicode::Normalize;
1605 # give this a MODS documentElement and an XPATH expression
1606 sub _xpath_to_string {
1610 my $ns_prefix = shift;
1613 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1617 # grab the set of matching nodes
1618 my @nodes = $xml->findnodes( $xpath );
1619 for my $value (@nodes) {
1621 # grab all children of the node
1622 my @children = $value->childNodes();
1623 for my $child (@children) {
1625 # add the childs content to the growing buffer
1626 my $content = quotemeta($child->textContent);
1627 next if ($unique && $string =~ /$content/); # uniquify the values
1628 $string .= $child->textContent . " ";
1631 $string .= $value->textContent . " ";
1634 return NFD($string);
1637 sub class_all_index_string_xml {
1643 OpenILS::Application::Ingest->post_init();
1644 $xml = $parser->parse_string($xml) unless (ref $xml);
1646 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1647 for my $type ( keys %{ $xpathset->{$class} } ) {
1648 my $value = _xpath_to_string(
1649 $mods_sheet->transform($xml)->documentElement,
1650 $xpathset->{$class}->{$type}->{xpath},
1651 "http://www.loc.gov/mods/",
1658 $value = NFD($value);
1659 $value =~ s/\pM+//sgo;
1660 $value =~ s/\pC+//sgo;
1661 $value =~ s/\W+$//sgo;
1663 $value =~ s/(\w)\./$1/sgo;
1664 $value = lc($value);
1666 my $fm = $class_constructor->new;
1667 $fm->value( $value );
1668 $fm->field( $xpathset->{$class}->{$type}->{id} );
1669 $client->respond($fm);
1673 __PACKAGE__->register_method(
1674 api_name => "open-ils.worm.field_entry.class.xml",
1675 method => "class_all_index_string_xml",
1681 sub class_all_index_string_record {
1687 OpenILS::Application::Ingest->post_init();
1688 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1690 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1692 $client->respond($fm);
1696 __PACKAGE__->register_method(
1697 api_name => "open-ils.worm.field_entry.class.record",
1698 method => "class_all_index_string_record",
1705 sub class_index_string_xml {
1712 OpenILS::Application::Ingest->post_init();
1713 $xml = $parser->parse_string($xml) unless (ref $xml);
1714 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1716 __PACKAGE__->register_method(
1717 api_name => "open-ils.worm.class.type.xml",
1718 method => "class_index_string_xml",
1723 sub class_index_string_record {
1730 OpenILS::Application::Ingest->post_init();
1731 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1733 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1734 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1737 __PACKAGE__->register_method(
1738 api_name => "open-ils.worm.class.type.record",
1739 method => "class_index_string_record",
1753 OpenILS::Application::Ingest->post_init();
1754 $xml = $parser->parse_string($xml) unless (ref $xml);
1755 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1757 __PACKAGE__->register_method(
1758 api_name => "open-ils.worm.xpath.xml",
1759 method => "xml_xpath",
1773 OpenILS::Application::Ingest->post_init();
1774 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1776 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1777 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1780 __PACKAGE__->register_method(
1781 api_name => "open-ils.worm.xpath.record",
1782 method => "record_xpath",
1788 # --------------------------------------------------------------------------------
1791 package OpenILS::Application::Ingest::Biblio::Leader;
1792 use base qw/OpenILS::Application::Ingest/;
1793 use Unicode::Normalize;
1795 our %marc_type_groups = (
1798 VIS => q/[gkro]{1}/,
1807 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1811 our %biblio_descriptor_code = (
1812 item_type => sub { substr($ldr,6,1); },
1815 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1816 return substr($oo8,29,1);
1817 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1818 return substr($oo8,23,1);
1822 bib_level => sub { substr($ldr,7,1); },
1823 control_type => sub { substr($ldr,8,1); },
1824 char_encoding => sub { substr($ldr,9,1); },
1825 enc_level => sub { substr($ldr,17,1); },
1826 cat_form => sub { substr($ldr,18,1); },
1827 pub_status => sub { substr($ldr,5,1); },
1828 item_lang => sub { substr($oo8,35,3); },
1829 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1830 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1831 audience => sub { substr($oo8,22,1); },
1834 sub _extract_biblio_descriptors {
1837 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1838 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1839 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1841 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1842 for my $rd_field ( keys %biblio_descriptor_code ) {
1843 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1849 sub extract_biblio_desc_xml {
1854 $xml = $parser->parse_string($xml) unless (ref $xml);
1856 return _extract_biblio_descriptors( $xml );
1858 __PACKAGE__->register_method(
1859 api_name => "open-ils.worm.biblio_leader.xml",
1860 method => "extract_biblio_desc_xml",
1865 sub extract_biblio_desc_record {
1870 OpenILS::Application::Ingest->post_init();
1871 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1873 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1874 $log->debug("Record descriptor for bib rec $rec is ".OpenSRF::Utils::JSON->perl2JSON($d), DEBUG);
1877 __PACKAGE__->register_method(
1878 api_name => "open-ils.worm.biblio_leader.record",
1879 method => "extract_biblio_desc_record",
1884 # --------------------------------------------------------------------------------
1887 package OpenILS::Application::Ingest::FlatMARC;
1888 use base qw/OpenILS::Application::Ingest/;
1889 use Unicode::Normalize;
1892 sub _marcxml_to_full_rows {
1894 my $marcxml = shift;
1895 my $xmltype = shift || 'metabib';
1897 my $type = "Fieldmapper::${xmltype}::full_rec";
1901 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1903 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1904 next unless $tagline;
1906 my $ns = $type->new;
1909 my $val = $tagline->textContent;
1911 $val =~ s/\pM+//sgo;
1912 $val =~ s/\pC+//sgo;
1913 $val =~ s/\W+$//sgo;
1919 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1920 next unless $tagline;
1922 my $ns = $type->new;
1924 $ns->tag( $tagline->getAttribute( "tag" ) );
1925 my $val = $tagline->textContent;
1927 $val =~ s/\pM+//sgo;
1928 $val =~ s/\pC+//sgo;
1929 $val =~ s/\W+$//sgo;
1935 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1936 next unless $tagline;
1938 my $tag = $tagline->getAttribute( "tag" );
1939 my $ind1 = $tagline->getAttribute( "ind1" );
1940 my $ind2 = $tagline->getAttribute( "ind2" );
1942 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1945 my $ns = $type->new;
1950 $ns->subfield( $data->getAttribute( "code" ) );
1951 my $val = $data->textContent;
1953 $val =~ s/\pM+//sgo;
1954 $val =~ s/\pC+//sgo;
1955 $val =~ s/\W+$//sgo;
1956 $ns->value( lc($val) );
1962 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1971 $xml = $parser->parse_string($xml) unless (ref $xml);
1973 my $type = 'metabib';
1974 $type = 'authority' if ($self->api_name =~ /authority/o);
1976 OpenILS::Application::Ingest->post_init();
1978 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1981 __PACKAGE__->register_method(
1982 api_name => "open-ils.worm.flat_marc.authority.xml",
1983 method => "flat_marc_xml",
1988 __PACKAGE__->register_method(
1989 api_name => "open-ils.worm.flat_marc.biblio.xml",
1990 method => "flat_marc_xml",
1996 sub flat_marc_record {
2001 my $type = 'biblio';
2002 $type = 'authority' if ($self->api_name =~ /authority/o);
2004 OpenILS::Application::Ingest->post_init();
2005 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
2007 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
2010 __PACKAGE__->register_method(
2011 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
2012 method => "flat_marc_record",
2017 __PACKAGE__->register_method(
2018 api_name => "open-ils.worm.flat_marc.authority.record_entry",
2019 method => "flat_marc_record",
2026 # --------------------------------------------------------------------------------
2029 package OpenILS::Application::Ingest::Biblio::Fingerprint;
2030 use base qw/OpenILS::Application::Ingest/;
2031 use Unicode::Normalize;
2032 use OpenSRF::EX qw/:try/;
2034 my @fp_mods_xpath = (
2035 '//mods:mods/mods:typeOfResource[text()="text"]' => [
2038 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2039 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2040 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2041 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2044 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2046 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2047 $text =~ s/\pM+//gso;
2048 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2050 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2051 $text =~ s/\s+/ /sgo;
2052 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2053 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2054 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2055 $text =~ s/\b(?:the|an?)\b//sgo;
2056 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2057 $text =~ s/\[.[^\]]+\]//sgo;
2058 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2059 $text =~ s/\s*[;\/\.]*$//sgo;
2060 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2065 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2066 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2069 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2071 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2072 $text =~ s/\pM+//gso;
2073 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2075 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076 $text =~ s/\s+/ /sgo;
2077 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2078 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2079 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2080 $text =~ s/,?\s+.*$//sgo;
2081 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2086 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2089 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2090 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2091 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2092 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2093 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2094 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2095 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2096 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2099 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2101 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2102 $text =~ s/\pM+//gso;
2103 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2105 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2106 $text =~ s/\s+/ /sgo;
2107 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2108 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2109 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2110 $text =~ s/\b(?:the|an?)\b//sgo;
2111 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2112 $text =~ s/\[.[^\]]+\]//sgo;
2113 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2114 $text =~ s/\s*[;\/\.]*$//sgo;
2115 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2120 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2121 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2122 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2123 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2126 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2128 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2129 $text =~ s/\pM+//gso;
2130 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2132 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2133 $text =~ s/\s+/ /sgo;
2134 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2135 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2136 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2137 $text =~ s/,?\s+.*$//sgo;
2138 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2145 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2149 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2153 my $match_index = 0;
2154 my $block_index = 1;
2155 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2156 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2158 my $block_name_index = 0;
2159 my $block_value_index = 1;
2160 my $block = $fp_mods_xpath[$block_index];
2161 while ( my $part = $$block[$block_value_index] ) {
2163 for my $xpath ( @{ $part->{xpath} } ) {
2164 $text = $mods->findvalue( $xpath );
2168 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2172 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2173 $fp_string .= $text;
2176 $block_name_index += 2;
2177 $block_value_index += 2;
2181 $fp_string =~ s/\W+//gso;
2182 $log->debug("Fingerprint is [$fp_string]", INFO);;
2192 sub refingerprint_bibrec {
2198 if (!OpenILS::Application::Ingest->in_transaction) {
2199 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2205 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2206 for my $b (@$bibs) {
2207 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2209 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2211 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2213 OpenILS::Application::Ingest->storage_req(
2214 'open-ils.storage.direct.biblio.record_entry.remote_update',
2216 { fingerprint => $fp->{fingerprint},
2217 quality => $fp->{quality} }
2220 if ($self->api_name !~ /nomap/o) {
2221 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2222 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2227 if (ref($old_source_map) and @$old_source_map) {
2228 for my $m (@$old_source_map) {
2229 $old_mrid = $m->metarecord;
2230 OpenILS::Application::Ingest->storage_req(
2231 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2237 my $old_sm = OpenILS::Application::Ingest->storage_req(
2238 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2239 { metarecord => $old_mrid }
2242 if (ref($old_sm) and @$old_sm == 0) {
2243 OpenILS::Application::Ingest->storage_req(
2244 'open-ils.storage.direct.metabib.metarecord.delete',
2249 my $mr = OpenILS::Application::Ingest->storage_req(
2250 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2251 { fingerprint => $fp->{fingerprint} }
2255 $mr = Fieldmapper::metabib::metarecord->new;
2256 $mr->fingerprint( $fp->{fingerprint} );
2257 $mr->master_record( $b->id );
2258 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2261 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2262 $mr_map->metarecord( $mr->id );
2263 $mr_map->source( $b->id );
2264 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2268 $client->respond($b->id);
2272 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2276 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2277 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2280 __PACKAGE__->register_method(
2281 api_name => "open-ils.worm.fingerprint.record.update",
2282 method => "refingerprint_bibrec",
2288 __PACKAGE__->register_method(
2289 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2290 method => "refingerprint_bibrec",
2297 sub fingerprint_bibrec {
2302 OpenILS::Application::Ingest->post_init();
2303 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2305 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2306 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2310 __PACKAGE__->register_method(
2311 api_name => "open-ils.worm.fingerprint.record",
2312 method => "fingerprint_bibrec",
2318 sub fingerprint_mods {
2323 OpenILS::Application::Ingest->post_init();
2324 my $mods = $parser->parse_string($xml)->documentElement;
2326 return _fp_mods( $mods );
2328 __PACKAGE__->register_method(
2329 api_name => "open-ils.worm.fingerprint.mods",
2330 method => "fingerprint_mods",
2335 sub fingerprint_marc {
2340 $xml = $parser->parse_string($xml) unless (ref $xml);
2342 OpenILS::Application::Ingest->post_init();
2343 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2344 $log->debug("Returning [$fp] as fingerprint", INFO);
2347 __PACKAGE__->register_method(
2348 api_name => "open-ils.worm.fingerprint.marc",
2349 method => "fingerprint_marc",
2357 sub biblio_fingerprint_record {
2362 OpenILS::Application::Ingest->post_init();
2364 my $marc = OpenILS::Application::Ingest
2365 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2368 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2369 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2372 __PACKAGE__->register_method(
2373 api_name => "open-ils.worm.fingerprint.record",
2374 method => "biblio_fingerprint_record",
2380 sub biblio_fingerprint {
2385 OpenILS::Application::Ingest->post_init();
2387 $marc = $parser->parse_string($marc) unless (ref $marc);
2389 my $mods = OpenILS::Application::Ingest::entityize(
2391 ->transform( $marc )
2397 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2400 $log->internal("Got MARC [$marc]");
2401 $log->internal("Created MODS [$mods]");
2404 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2405 my $conf = OpenSRF::Utils::SettingsClient->new;
2407 my $libs = $conf->config_value(@pfx, 'script_path');
2408 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2409 my $script_libs = (ref($libs)) ? $libs : [$libs];
2411 $log->debug("Loading script $script_file for biblio fingerprinting...");
2413 $fp_script = new OpenILS::Utils::ScriptRunner
2414 ( file => $script_file,
2415 paths => $script_libs,
2416 reset_count => 1000 );
2419 $log->debug("Applying environment for biblio fingerprinting...");
2421 my $env = {marc => $marc, mods => $mods};
2422 #my $res = {fingerprint => '', quality => '0'};
2424 $fp_script->insert('environment' => $env);
2425 #$fp_script->insert('result' => $res);
2427 $log->debug("Running script for biblio fingerprinting...");
2429 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2431 $log->debug("Script for biblio fingerprinting completed successfully...");
2435 __PACKAGE__->register_method(
2436 api_name => "open-ils.worm.fingerprint.marc",
2437 method => "biblio_fingerprint",
2442 # --------------------------------------------------------------------------------
2456 my $create_source_map;
2471 my %descriptor_code = (
2472 item_type => 'substr($ldr,6,1)',
2473 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2474 bib_level => 'substr($ldr,7,1)',
2475 control_type => 'substr($ldr,8,1)',
2476 char_encoding => 'substr($ldr,9,1)',
2477 enc_level => 'substr($ldr,17,1)',
2478 cat_form => 'substr($ldr,18,1)',
2479 pub_status => 'substr($ldr,5,1)',
2480 item_lang => 'substr($oo8,35,3)',
2481 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2482 audience => 'substr($oo8,22,1)',
2492 if ($self->api_name =~ /no_map/o) {
2496 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2498 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2500 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2502 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2504 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2505 unless ($sm_lookup);
2506 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2507 unless ($mr_lookup);
2508 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2509 unless ($mr_update);
2510 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2512 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2513 unless ($update_entry);
2514 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2515 unless ($rm_old_sm);
2516 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2517 unless ($rm_old_rd);
2518 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2519 unless ($rm_old_fr);
2520 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2521 unless ($rm_old_tr);
2522 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2523 unless ($rm_old_ar);
2524 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2525 unless ($rm_old_sr);
2526 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2527 unless ($rm_old_kr);
2528 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2529 unless ($rm_old_ser);
2530 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2531 unless ($mr_create);
2532 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2533 unless ($create_source_map);
2534 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2535 unless ($rd_create);
2536 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2537 unless ($fr_create);
2538 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2539 unless ($$create{title});
2540 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2541 unless ($$create{author});
2542 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2543 unless ($$create{subject});
2544 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2545 unless ($$create{keyword});
2546 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2547 unless ($$create{series});
2550 my ($outer_xact) = $in_xact->run;
2552 unless ($outer_xact) {
2553 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2554 my ($r) = $begin->run($client);
2555 unless (defined $r and $r) {
2557 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2560 } catch Error with {
2561 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2571 for my $entry ( $lookup->run(@docids) ) {
2572 # step -1: grab the doc from storage
2573 next unless ($entry);
2576 my $xslt_doc = $parser->parse_file(
2577 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2578 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2581 my $xml = $entry->marc;
2582 my $docid = $entry->id;
2583 my $marcdoc = $parser->parse_string($xml);
2584 my $modsdoc = $mods_sheet->transform($marcdoc);
2586 my $mods = $modsdoc->documentElement;
2587 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2589 $entry->fingerprint( fingerprint_mods( $mods ) );
2590 push @entry_list, $entry;
2592 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2595 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2596 if (!$mr || !@$mr) {
2597 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2598 $mr = new Fieldmapper::metabib::metarecord;
2599 $mr->fingerprint( $entry->fingerprint );
2600 $mr->master_record( $entry->id );
2601 my ($new_mr) = $mr_create->run($mr);
2603 unless (defined $mr) {
2604 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2607 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2612 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2613 $sm->metarecord( $mr->id );
2614 $sm->source( $entry->id );
2615 push @source_maps, $sm;
2618 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2619 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2621 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2622 for my $rd_field ( keys %descriptor_code ) {
2623 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2625 $rd_obj->record( $docid );
2626 push @rd_list, $rd_obj;
2628 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2630 # step 2: build the KOHA rows
2631 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2632 $_->record( $docid ) for (@tmp_list);
2633 push @ns_list, @tmp_list;
2637 last unless ($self->api_name =~ /batch$/o);
2640 $rm_old_rd->run( { record => \@docids } );
2641 $rm_old_fr->run( { record => \@docids } );
2642 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2643 $rm_old_tr->run( { source => \@docids } );
2644 $rm_old_ar->run( { source => \@docids } );
2645 $rm_old_sr->run( { source => \@docids } );
2646 $rm_old_kr->run( { source => \@docids } );
2647 $rm_old_ser->run( { source => \@docids } );
2650 my ($sm) = $create_source_map->run(@source_maps);
2651 unless (defined $sm) {
2652 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2654 my ($mr) = $mr_update->run(@mr_list);
2655 unless (defined $mr) {
2656 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2660 my ($re) = $update_entry->run(@entry_list);
2661 unless (defined $re) {
2662 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2665 my ($rd) = $rd_create->run(@rd_list);
2666 unless (defined $rd) {
2667 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2670 my ($fr) = $fr_create->run(@ns_list);
2671 unless (defined $fr) {
2672 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2675 # step 5: insert the new metadata
2676 for my $class ( qw/title author subject keyword series/ ) {
2678 for my $doc ( @mods_data ) {
2679 my ($did) = keys %$doc;
2680 my ($data) = values %$doc;
2682 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2683 for my $row ( keys %{ $$data{$class} } ) {
2684 next unless (exists $$data{$class}{$row});
2685 next unless ($$data{$class}{$row}{value});
2686 my $fm_obj = $fm_constructor->new;
2687 $fm_obj->value( $$data{$class}{$row}{value} );
2688 $fm_obj->field( $$data{$class}{$row}{field_id} );
2689 $fm_obj->source( $did );
2690 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2692 push @md_list, $fm_obj;
2696 my ($cr) = $$create{$class}->run(@md_list);
2697 unless (defined $cr) {
2698 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2702 unless ($outer_xact) {
2703 $log->debug("Commiting transaction started by the Ingest.", INFO);
2704 my ($c) = $commit->run;
2705 unless (defined $c and $c) {
2707 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2713 __PACKAGE__->register_method(
2714 api_name => "open-ils.worm.wormize",
2715 method => "wormize",
2719 __PACKAGE__->register_method(
2720 api_name => "open-ils.worm.wormize.no_map",
2721 method => "wormize",
2725 __PACKAGE__->register_method(
2726 api_name => "open-ils.worm.wormize.batch",
2727 method => "wormize",
2731 __PACKAGE__->register_method(
2732 api_name => "open-ils.worm.wormize.no_map.batch",
2733 method => "wormize",
2748 my $acreate_source_map;
2763 sub authority_wormize {
2770 if ($self->api_name =~ /no_map/o) {
2774 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2776 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2778 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2780 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2782 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2784 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2785 unless ($aupdate_entry);
2786 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2787 unless ($arm_old_rd);
2788 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2789 unless ($arm_old_fr);
2790 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2791 unless ($ard_create);
2792 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2793 unless ($afr_create);
2796 my ($outer_xact) = $in_xact->run;
2798 unless ($outer_xact) {
2799 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2800 my ($r) = $begin->run($client);
2801 unless (defined $r and $r) {
2803 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2806 } catch Error with {
2807 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2817 for my $entry ( $lookup->run(@docids) ) {
2818 # step -1: grab the doc from storage
2819 next unless ($entry);
2822 # my $xslt_doc = $parser->parse_file(
2823 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2824 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2827 my $xml = $entry->marc;
2828 my $docid = $entry->id;
2829 my $marcdoc = $parser->parse_string($xml);
2830 #my $madsdoc = $mads_sheet->transform($marcdoc);
2832 #my $mads = $madsdoc->documentElement;
2833 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2835 push @entry_list, $entry;
2837 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2838 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2840 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2841 for my $rd_field ( keys %descriptor_code ) {
2842 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2844 $rd_obj->record( $docid );
2845 push @rd_list, $rd_obj;
2847 # step 2: build the KOHA rows
2848 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2849 $_->record( $docid ) for (@tmp_list);
2850 push @ns_list, @tmp_list;
2854 last unless ($self->api_name =~ /batch$/o);
2857 $arm_old_rd->run( { record => \@docids } );
2858 $arm_old_fr->run( { record => \@docids } );
2860 my ($rd) = $ard_create->run(@rd_list);
2861 unless (defined $rd) {
2862 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2865 my ($fr) = $fr_create->run(@ns_list);
2866 unless (defined $fr) {
2867 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2870 unless ($outer_xact) {
2871 $log->debug("Commiting transaction started by Ingest.", INFO);
2872 my ($c) = $commit->run;
2873 unless (defined $c and $c) {
2875 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2881 __PACKAGE__->register_method(
2882 api_name => "open-ils.worm.authortiy.wormize",
2883 method => "wormize",
2887 __PACKAGE__->register_method(
2888 api_name => "open-ils.worm.authority.wormize.batch",
2889 method => "wormize",
2895 # --------------------------------------------------------------------------------
2898 sub _marcxml_to_full_rows {
2900 my $marcxml = shift;
2901 my $type = shift || 'Fieldmapper::metabib::full_rec';
2905 my $root = $marcxml->documentElement;
2907 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2908 next unless $tagline;
2910 my $ns = new Fieldmapper::metabib::full_rec;
2913 my $val = NFD($tagline->textContent);
2914 $val =~ s/(\pM+)//gso;
2920 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2921 next unless $tagline;
2923 my $ns = new Fieldmapper::metabib::full_rec;
2925 $ns->tag( $tagline->getAttribute( "tag" ) );
2926 my $val = NFD($tagline->textContent);
2927 $val =~ s/(\pM+)//gso;
2933 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2934 next unless $tagline;
2936 my $tag = $tagline->getAttribute( "tag" );
2937 my $ind1 = $tagline->getAttribute( "ind1" );
2938 my $ind2 = $tagline->getAttribute( "ind2" );
2940 for my $data ( $tagline->childNodes ) {
2943 my $ns = $type->new;
2948 $ns->subfield( $data->getAttribute( "code" ) );
2949 my $val = NFD($data->textContent);
2950 $val =~ s/(\pM+)//gso;
2951 $ns->value( lc($val) );
2959 sub _get_field_value {
2961 my( $root, $xpath ) = @_;
2965 # grab the set of matching nodes
2966 my @nodes = $root->findnodes( $xpath );
2967 for my $value (@nodes) {
2969 # grab all children of the node
2970 my @children = $value->childNodes();
2971 for my $child (@children) {
2973 # add the childs content to the growing buffer
2974 my $content = quotemeta($child->textContent);
2975 next if ($string =~ /$content/); # uniquify the values
2976 $string .= $child->textContent . " ";
2979 $string .= $value->textContent . " ";
2982 $string = NFD($string);
2983 $string =~ s/(\pM)//gso;
2988 sub modsdoc_to_values {
2989 my( $self, $mods ) = @_;
2991 for my $class (keys %$xpathset) {
2992 $data->{$class} = {};
2993 for my $type (keys %{$xpathset->{$class}}) {
2994 $data->{$class}->{$type} = {};
2995 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};