1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
16 use OpenILS::Utils::Fieldmapper;
20 use Time::HiRes qw(time);
22 our %supported_formats = (
23 mods32 => {ns => 'http://www.loc.gov/mods/v3'},
24 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
25 mods => {ns => 'http://www.loc.gov/mods/'},
26 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
27 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
28 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
29 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
30 atom => {ns => 'http://www.w3.org/2005/Atom'},
31 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
35 rss10 => {ns => 'http://purl.org/rss/1.0/'},
36 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
41 my $log = 'OpenSRF::Utils::Logger';
43 my $parser = XML::LibXML->new();
44 my $xslt = XML::LibXSLT->new();
54 unless (keys %$xpathset) {
55 $log->debug("Running post_init", DEBUG);
57 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
59 unless ($supported_formats{mods}{xslt}) {
60 $log->debug("Loading MODS XSLT", DEBUG);
61 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
62 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
65 unless ($supported_formats{mods3}{xslt}) {
66 $log->debug("Loading MODS v3 XSLT", DEBUG);
67 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
68 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
71 unless ($supported_formats{mods32}{xslt}) {
72 $log->debug("Loading MODS v32 XSLT", DEBUG);
73 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
74 $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
77 my $req = OpenSRF::AppSession
78 ->create('open-ils.cstore')
80 # XXX testing new metabib field use for faceting
81 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
82 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
86 if (ref $req and @$req) {
88 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
89 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
90 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
91 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
102 $stuff = NFD($stuff);
104 $stuff = NFC($stuff);
107 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
111 # --------------------------------------------------------------------------------
114 package OpenILS::Application::Ingest::Biblio;
115 use base qw/OpenILS::Application::Ingest/;
116 use Unicode::Normalize;
118 sub rw_biblio_ingest_single_object {
123 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
124 return undef unless ($blob);
126 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
127 $bib->quality( $blob->{fingerprint}->{quality} );
129 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
131 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
134 # update uri stuff ...
136 # gather URI call numbers for this record
137 my $uri_cns = $u->{call_number} = $cstore->request(
138 'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' }
141 # gather the maps for those call numbers
142 my $uri_maps = $u->{call_number} = $cstore->request(
143 'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns }
146 # delete the old maps
147 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps);
149 # and delete the call numbers if there are no more URIs
150 if (!@{ $blob->{uri} }) {
151 $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns);
154 # now, add CNs, URIs and maps
155 my %new_cns_by_owner;
156 my %new_uris_by_owner;
157 for my $u ( @{ $blob->{uri} } ) {
159 my $owner = $u->{call_number}->owning_lib;
161 if ($u->{call_number}->isnew) {
162 if ($new_cns_by_owner{$owner}) {
163 $u->{call_number} = $new_cns_by_owner{$owner};
165 $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request(
166 'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number}
171 if ($u->{uri}->isnew) {
172 if ($new_uris_by_owner{$owner}) {
173 $u->{uri} = $new_uris_by_owner{$owner};
175 $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request(
176 'open-ils.cstore.direct.asset.uri.create' => $u->{uri}
181 my $umap = Fieldmapper::asset::uri_call_number_map->new;
182 $umap->uri($u->{uri}->id);
183 $umap->call_number($u->{call_number}->id);
185 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1) if (!$tmp);
188 # update full_rec stuff ...
189 $tmp = $cstore->request(
190 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
191 { record => $bib->id }
194 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
195 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
197 # update rec_descriptor stuff ...
198 $tmp = $cstore->request(
199 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
200 { record => $bib->id }
203 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
204 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
206 # deal with classed fields...
207 for my $class ( qw/title author subject keyword series/ ) {
208 $tmp = $cstore->request(
209 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
210 { source => $bib->id }
213 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
215 for my $obj ( @{ $blob->{field_entries} } ) {
216 my $class = $obj->class_name;
217 $class =~ s/^Fieldmapper:://o;
219 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
224 $tmp = $cstore->request(
225 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
226 { source => $bib->id }
229 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
232 my $old_mrs = $cstore->request(
233 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
234 )->gather(1) if (@$tmp);
236 $old_mrs = [] if (!ref($old_mrs));
239 for my $m (@$old_mrs) {
240 if ($m->fingerprint eq $bib->fingerprint) {
243 my $others = $cstore->request(
244 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
249 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
259 # Get the matchin MR, if any.
260 $mr = $cstore->request(
261 'open-ils.cstore.direct.metabib.metarecord.search',
262 { fingerprint => $bib->fingerprint }
265 $holds = $cstore->request(
266 'open-ils.cstore.direct.action.hold_request.search.atomic',
267 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
268 )->gather(1) if (@$old_mrs);
271 for my $h (@$holds) {
273 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
280 $mr = new Fieldmapper::metabib::metarecord;
281 $mr->fingerprint( $bib->fingerprint );
282 $mr->master_record( $bib->id );
285 "open-ils.cstore.direct.metabib.metarecord.create",
286 $mr => { quiet => 'true' }
290 for my $h (grep { !$_->ischanged } @$holds) {
292 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
295 my $mrm = $cstore->request(
296 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
297 { metarecord => $mr->id }
301 my $best = $cstore->request(
302 "open-ils.cstore.direct.biblio.record_entry.search",
303 { id => [ map { $_->source } @$mrm ] },
304 { 'select' => { bre => [ qw/id quality/ ] },
305 order_by => { bre => "quality desc" },
310 if ($best->quality > $bib->quality) {
311 $mr->master_record($best->id);
313 $mr->master_record($bib->id);
316 $mr->master_record($bib->id);
321 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
324 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
325 $mrm->source($bib->id);
326 $mrm->metarecord($mr->id);
328 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
329 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
331 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
336 __PACKAGE__->register_method(
337 api_name => "open-ils.ingest.full.biblio.object",
338 method => "rw_biblio_ingest_single_object",
343 sub rw_biblio_ingest_single_record {
348 OpenILS::Application::Ingest->post_init();
349 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
350 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
352 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
354 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
357 return undef unless ($r and @$r);
359 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
361 __PACKAGE__->register_method(
362 api_name => "open-ils.ingest.full.biblio.record",
363 method => "rw_biblio_ingest_single_record",
368 sub rw_biblio_ingest_record_list {
371 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
373 OpenILS::Application::Ingest->post_init();
374 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
375 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
377 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
379 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
382 return undef unless ($r and @$r);
385 $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
389 __PACKAGE__->register_method(
390 api_name => "open-ils.ingest.full.biblio.record_list",
391 method => "rw_biblio_ingest_record_list",
396 sub ro_biblio_ingest_single_object {
400 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
402 my $document = $parser->parse_string($xml);
404 my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib);
405 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
406 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
407 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
408 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
410 $_->source($bib->id) for (@mXfe);
411 $_->record($bib->id) for (@mfr);
412 $rd->record($bib->id) if ($rd);
414 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris };
416 __PACKAGE__->register_method(
417 api_name => "open-ils.ingest.full.biblio.object.readonly",
418 method => "ro_biblio_ingest_single_object",
423 sub ro_biblio_ingest_single_xml {
426 my $xml = OpenILS::Application::Ingest::entityize(shift);
428 my $document = $parser->parse_string($xml);
430 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
431 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
432 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
433 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
435 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
437 __PACKAGE__->register_method(
438 api_name => "open-ils.ingest.full.biblio.xml.readonly",
439 method => "ro_biblio_ingest_single_xml",
444 sub ro_biblio_ingest_single_record {
449 OpenILS::Application::Ingest->post_init();
450 my $r = OpenSRF::AppSession
451 ->create('open-ils.cstore')
452 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
455 return undef unless ($r and @$r);
457 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
459 $_->source($rec) for (@{$res->{field_entries}});
460 $_->record($rec) for (@{$res->{full_rec}});
461 $res->{descriptor}->record($rec);
465 __PACKAGE__->register_method(
466 api_name => "open-ils.ingest.full.biblio.record.readonly",
467 method => "ro_biblio_ingest_single_record",
472 sub ro_biblio_ingest_stream_record {
476 OpenILS::Application::Ingest->post_init();
478 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
480 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
482 my $rec = $resp->content;
483 last unless (defined $rec);
485 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
486 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
488 $_->source($rec) for (@{$res->{field_entries}});
489 $_->record($rec) for (@{$res->{full_rec}});
491 $client->respond( $res );
496 __PACKAGE__->register_method(
497 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
498 method => "ro_biblio_ingest_stream_record",
503 sub ro_biblio_ingest_stream_xml {
507 OpenILS::Application::Ingest->post_init();
509 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
511 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
513 my $xml = $resp->content;
514 last unless (defined $xml);
516 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
517 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
519 $client->respond( $res );
524 __PACKAGE__->register_method(
525 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
526 method => "ro_biblio_ingest_stream_xml",
531 sub rw_biblio_ingest_stream_import {
535 OpenILS::Application::Ingest->post_init();
537 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
539 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
541 my $bib = $resp->content;
542 last unless (defined $bib);
544 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
545 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
547 $_->source($bib->id) for (@{$res->{field_entries}});
548 $_->record($bib->id) for (@{$res->{full_rec}});
550 $client->respond( $res );
555 __PACKAGE__->register_method(
556 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
557 method => "rw_biblio_ingest_stream_import",
563 # --------------------------------------------------------------------------------
566 package OpenILS::Application::Ingest::Authority;
567 use base qw/OpenILS::Application::Ingest/;
568 use Unicode::Normalize;
570 sub ro_authority_ingest_single_object {
574 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
576 my $document = $parser->parse_string($xml);
578 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
580 $_->record($bib->id) for (@mfr);
582 return { full_rec => \@mfr };
584 __PACKAGE__->register_method(
585 api_name => "open-ils.ingest.full.authority.object.readonly",
586 method => "ro_authority_ingest_single_object",
591 sub ro_authority_ingest_single_xml {
594 my $xml = OpenILS::Application::Ingest::entityize(shift);
596 my $document = $parser->parse_string($xml);
598 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
600 return { full_rec => \@mfr };
602 __PACKAGE__->register_method(
603 api_name => "open-ils.ingest.full.authority.xml.readonly",
604 method => "ro_authority_ingest_single_xml",
609 sub ro_authority_ingest_single_record {
614 OpenILS::Application::Ingest->post_init();
615 my $r = OpenSRF::AppSession
616 ->create('open-ils.cstore')
617 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
620 return undef unless ($r and @$r);
622 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
624 $_->record($rec) for (@{$res->{full_rec}});
625 $res->{descriptor}->record($rec);
629 __PACKAGE__->register_method(
630 api_name => "open-ils.ingest.full.authority.record.readonly",
631 method => "ro_authority_ingest_single_record",
636 sub ro_authority_ingest_stream_record {
640 OpenILS::Application::Ingest->post_init();
642 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
644 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
646 my $rec = $resp->content;
647 last unless (defined $rec);
649 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
650 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
652 $_->record($rec) for (@{$res->{full_rec}});
654 $client->respond( $res );
659 __PACKAGE__->register_method(
660 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
661 method => "ro_authority_ingest_stream_record",
666 sub ro_authority_ingest_stream_xml {
670 OpenILS::Application::Ingest->post_init();
672 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
674 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
676 my $xml = $resp->content;
677 last unless (defined $xml);
679 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
680 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
682 $client->respond( $res );
687 __PACKAGE__->register_method(
688 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
689 method => "ro_authority_ingest_stream_xml",
694 sub rw_authority_ingest_stream_import {
698 OpenILS::Application::Ingest->post_init();
700 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
702 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
704 my $bib = $resp->content;
705 last unless (defined $bib);
707 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
708 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
710 $_->record($bib->id) for (@{$res->{full_rec}});
712 $client->respond( $res );
717 __PACKAGE__->register_method(
718 api_name => "open-ils.ingest.full.authority.bib_stream.import",
719 method => "rw_authority_ingest_stream_import",
725 # --------------------------------------------------------------------------------
726 # MARC index extraction
728 package OpenILS::Application::Ingest::XPATH;
729 use base qw/OpenILS::Application::Ingest/;
730 use Unicode::Normalize;
732 # give this an XML documentElement and an XPATH expression
733 sub xpath_to_string {
737 my $ns_prefix = shift;
740 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
744 # grab the set of matching nodes
745 my @nodes = $xml->findnodes( $xpath );
746 for my $value (@nodes) {
748 # grab all children of the node
749 my @children = $value->childNodes();
750 for my $child (@children) {
752 # add the childs content to the growing buffer
753 my $content = quotemeta($child->textContent);
754 next if ($unique && $string =~ /$content/); # uniquify the values
755 $string .= $child->textContent . " ";
758 $string .= $value->textContent . " ";
762 $string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
763 $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
768 sub class_index_string_xml {
774 OpenILS::Application::Ingest->post_init();
775 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
779 for my $class (@classes) {
780 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
781 for my $type ( keys %{ $xpathset->{$class} } ) {
783 my $def = $xpathset->{$class}->{$type};
784 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
789 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
790 $transform_cache{$def->{format}} = $document;
793 my $value = xpath_to_string(
794 $document->documentElement => $def->{xpath},
795 $sf->{ns} => $def->{format},
801 $value = NFD($value);
802 $value =~ s/\pM+//sgo;
803 $value =~ s/\pC+//sgo;
804 $value =~ s/\W+$//sgo;
806 $value =~ s/\b\.+\b//sgo;
809 my $fm = $class_constructor->new;
810 $fm->value( $value );
811 $fm->field( $xpathset->{$class}->{$type}->{id} );
812 $client->respond($fm);
817 __PACKAGE__->register_method(
818 api_name => "open-ils.ingest.field_entry.class.xml",
819 method => "class_index_string_xml",
825 sub class_index_string_record {
831 OpenILS::Application::Ingest->post_init();
832 my $r = OpenSRF::AppSession
833 ->create('open-ils.cstore')
834 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
837 return undef unless ($r and @$r);
839 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
841 $client->respond($fm);
845 __PACKAGE__->register_method(
846 api_name => "open-ils.ingest.field_entry.class.record",
847 method => "class_index_string_record",
853 sub all_index_string_xml {
858 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
859 $client->respond($fm);
863 __PACKAGE__->register_method(
864 api_name => "open-ils.ingest.extract.field_entry.all.xml",
865 method => "all_index_string_xml",
871 sub all_index_string_record {
876 OpenILS::Application::Ingest->post_init();
877 my $r = OpenSRF::AppSession
878 ->create('open-ils.cstore')
879 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
882 return undef unless ($r and @$r);
884 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
886 $client->respond($fm);
890 __PACKAGE__->register_method(
891 api_name => "open-ils.ingest.extract.field_entry.all.record",
892 method => "all_index_string_record",
898 # --------------------------------------------------------------------------------
901 package OpenILS::Application::Ingest::FlatMARC;
902 use base qw/OpenILS::Application::Ingest/;
903 use Unicode::Normalize;
906 sub _marcxml_to_full_rows {
909 my $xmltype = shift || 'metabib';
911 my $type = "Fieldmapper::${xmltype}::full_rec";
915 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
917 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
918 next unless $tagline;
923 my $val = $tagline->textContent;
933 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
934 next unless $tagline;
938 $ns->tag( $tagline->getAttribute( "tag" ) );
939 my $val = $tagline->textContent;
949 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
950 next unless $tagline;
952 my $tag = $tagline->getAttribute( "tag" );
953 my $ind1 = $tagline->getAttribute( "ind1" );
954 my $ind2 = $tagline->getAttribute( "ind2" );
956 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
964 $ns->subfield( $data->getAttribute( "code" ) );
965 my $val = $data->textContent;
970 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
971 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
972 $ns->value( lc($val) );
977 if ($xmltype eq 'metabib' and $tag eq '245') {
980 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
981 next unless ($data and $data->getAttribute( "code" ) eq 'a');
988 $ns->subfield( $data->getAttribute( "code" ) );
989 my $val = substr( $data->textContent, $ind2 );
994 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
995 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
996 $ns->value( lc($val) );
1003 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
1012 $log->debug("processing [$xml]");
1014 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
1016 my $type = 'metabib';
1017 $type = 'authority' if ($self->api_name =~ /authority/o);
1019 OpenILS::Application::Ingest->post_init();
1021 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1024 __PACKAGE__->register_method(
1025 api_name => "open-ils.ingest.flat_marc.authority.xml",
1026 method => "flat_marc_xml",
1031 __PACKAGE__->register_method(
1032 api_name => "open-ils.ingest.flat_marc.biblio.xml",
1033 method => "flat_marc_xml",
1039 sub flat_marc_record {
1044 my $type = 'biblio';
1045 $type = 'authority' if ($self->api_name =~ /authority/o);
1047 OpenILS::Application::Ingest->post_init();
1048 my $r = OpenSRF::AppSession
1049 ->create('open-ils.cstore')
1050 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
1054 return undef unless ($r and $r->marc);
1056 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
1057 for my $row (@rows) {
1058 $client->respond($row);
1059 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
1063 __PACKAGE__->register_method(
1064 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
1065 method => "flat_marc_record",
1070 __PACKAGE__->register_method(
1071 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
1072 method => "flat_marc_record",
1079 # --------------------------------------------------------------------------------
1082 package OpenILS::Application::Ingest::Biblio::URI;
1083 use base qw/OpenILS::Application::Ingest/;
1084 use Unicode::Normalize;
1085 use OpenSRF::EX qw/:try/;
1088 sub _extract_856_uris {
1091 my $marcxml = shift;
1094 my @nodes = $marcxml->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]');
1096 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
1098 for my $node (@nodes) {
1099 # first, is there a URI?
1100 my $href = $node->findvalue('[local-name()="subfield" and @code="u"]/text()');
1101 next unless ($href);
1103 # now, find the best possible label
1104 my $label = $node->findvalue('[local-name()="subfield" and @code="y"]/text()');
1105 $label ||= $node->findvalue('[local-name()="subfield" and @code="3"]/text()');
1109 my $use = $node->findvalue('[local-name()="subfield" and @code="z"]/text()');
1110 $use ||= $node->findvalue('[local-name()="subfield" and @code="2"]/text()');
1111 $use ||= $node->findvalue('[local-name()="subfield" and @code="n"]/text()');
1113 # moving on to the URI owner
1114 my $owner = $node->findvalue('[local-name()="subfield" and @code="w"]/text()');
1115 $owner ||= $node->findvalue('[local-name()="subfield" and @code="n"]/text()');
1116 $owner ||= $node->findvalue('[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce
1118 $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ...
1120 # no owner? skip it :(
1121 next unless ($owner);
1124 ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} )
1129 # now we can construct the uri object
1131 ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use => $use, active => 't' } )
1135 $uri = Fieldmapper::asset::uri->new;
1137 $uri->label($label);
1142 # see if we need to create a call number
1144 ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owner => $org->id, record => $recid, label => '##URI##' } )
1148 $cn = Fieldmapper::asset::call_number->new;
1150 $cn->owner( $org->id );
1151 $cn->record( $recid );
1152 $cn->label( '##URI##' );
1155 push @objects, { uri => $uri, call_number => $cn };
1158 $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid");
1162 sub get_uris_record {
1167 OpenILS::Application::Ingest->post_init();
1168 my $r = OpenSRF::AppSession
1169 ->create('open-ils.cstore')
1170 ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec )
1173 return undef unless ($r and $r->marc);
1175 $client->respond($_) for (_extract_856_uris($r->id, $r->marc));
1178 __PACKAGE__->register_method(
1179 api_name => "open-ils.ingest.856_uri.record",
1180 method => "get_uris_record",
1186 sub get_uris_object {
1191 return undef unless ($obj and $obj->marc);
1193 $client->respond($_) for (_extract_856_uris($obj->id, $obj->marc));
1196 __PACKAGE__->register_method(
1197 api_name => "open-ils.ingest.856_uri.object",
1198 method => "get_uris_object",
1205 # --------------------------------------------------------------------------------
1208 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1209 use base qw/OpenILS::Application::Ingest/;
1210 use Unicode::Normalize;
1211 use OpenSRF::EX qw/:try/;
1213 sub biblio_fingerprint_record {
1218 OpenILS::Application::Ingest->post_init();
1220 my $r = OpenSRF::AppSession
1221 ->create('open-ils.cstore')
1222 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1225 return undef unless ($r and $r->marc);
1227 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1228 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1229 $fp->{quality} = int($fp->{quality});
1232 __PACKAGE__->register_method(
1233 api_name => "open-ils.ingest.fingerprint.record",
1234 method => "biblio_fingerprint_record",
1240 sub biblio_fingerprint {
1243 my $xml = OpenILS::Application::Ingest::entityize(shift);
1245 $log->internal("Got MARC [$xml]");
1248 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1249 my $conf = OpenSRF::Utils::SettingsClient->new;
1251 my $libs = $conf->config_value(@pfx, 'script_path');
1252 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1253 my $script_libs = (ref($libs)) ? $libs : [$libs];
1255 $log->debug("Loading script $script_file for biblio fingerprinting...");
1257 $fp_script = new OpenILS::Utils::ScriptRunner
1258 ( file => $script_file,
1259 paths => $script_libs,
1260 reset_count => 100 );
1263 $fp_script->insert('environment' => {marc => $xml} => 1);
1265 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1266 $log->debug("Script for biblio fingerprinting completed successfully...");
1270 __PACKAGE__->register_method(
1271 api_name => "open-ils.ingest.fingerprint.xml",
1272 method => "biblio_fingerprint",
1278 sub biblio_descriptor {
1281 my $xml = OpenILS::Application::Ingest::entityize(shift);
1283 $log->internal("Got MARC [$xml]");
1286 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1287 my $conf = OpenSRF::Utils::SettingsClient->new;
1289 my $libs = $conf->config_value(@pfx, 'script_path');
1290 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1291 my $script_libs = (ref($libs)) ? $libs : [$libs];
1293 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1295 $rd_script = new OpenILS::Utils::ScriptRunner
1296 ( file => $script_file,
1297 paths => $script_libs,
1298 reset_count => 100 );
1301 $log->debug("Setting up environment for descriptor extraction script...");
1302 $rd_script->insert('environment.marc' => $xml => 1);
1303 $log->debug("Environment building complete...");
1305 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1306 $log->debug("Script for biblio descriptor extraction completed successfully");
1308 my $d1 = $res->date1;
1309 if ($d1 && $d1 ne ' ') {
1314 my $d2 = $res->date2;
1315 if ($d2 && $d2 ne ' ') {
1322 __PACKAGE__->register_method(
1323 api_name => "open-ils.ingest.descriptor.xml",
1324 method => "biblio_descriptor",