1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
16 use OpenILS::Utils::Fieldmapper;
20 use Time::HiRes qw(time);
22 our %supported_formats = (
23 mods33 => {ns => 'http://www.loc.gov/mods/v3'},
24 mods32 => {ns => 'http://www.loc.gov/mods/v3'},
25 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
26 mods => {ns => 'http://www.loc.gov/mods/'},
27 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
28 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
29 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
30 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
31 atom => {ns => 'http://www.w3.org/2005/Atom'},
32 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
36 rss10 => {ns => 'http://purl.org/rss/1.0/'},
37 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
42 my $log = 'OpenSRF::Utils::Logger';
44 my $parser = XML::LibXML->new();
45 my $xslt = XML::LibXSLT->new();
55 unless (keys %$xpathset) {
56 $log->debug("Running post_init", DEBUG);
58 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
60 unless ($supported_formats{mods}{xslt}) {
61 $log->debug("Loading MODS XSLT", DEBUG);
62 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
63 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
66 unless ($supported_formats{mods3}{xslt}) {
67 $log->debug("Loading MODS v3 XSLT", DEBUG);
68 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
69 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
72 unless ($supported_formats{mods32}{xslt}) {
73 $log->debug("Loading MODS v32 XSLT", DEBUG);
74 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
75 $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
78 unless ($supported_formats{mods33}{xslt}) {
79 $log->debug("Loading MODS v33 XSLT", DEBUG);
80 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS33.xsl");
81 $supported_formats{mods33}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
84 my $req = OpenSRF::AppSession
85 ->create('open-ils.cstore')
87 # XXX testing new metabib field use for faceting
88 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
89 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
93 if (ref $req and @$req) {
95 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
96 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
97 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
98 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
109 $stuff = NFD($stuff);
111 $stuff = NFC($stuff);
114 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
118 # --------------------------------------------------------------------------------
121 package OpenILS::Application::Ingest::Biblio;
122 use base qw/OpenILS::Application::Ingest/;
123 use Unicode::Normalize;
125 sub rw_biblio_ingest_single_object {
130 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
131 return undef unless ($blob);
133 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
134 $bib->quality( $blob->{fingerprint}->{quality} );
136 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
138 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
141 # update uri stuff ...
143 # gather URI call numbers for this record
144 my $uri_cns = $u->{call_number} = $cstore->request(
145 'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' }
149 # gather the maps for those call numbers
150 my $uri_maps = $u->{call_number} = $cstore->request(
151 'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns }
154 # delete the old maps
155 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps);
157 # and delete the call numbers if there are no more URIs
158 if (!@{ $blob->{uri} }) {
159 $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns);
163 # now, add CNs, URIs and maps
164 my %new_cns_by_owner;
165 my %new_uris_by_owner;
166 for my $u ( @{ $blob->{uri} } ) {
168 my $owner = $u->{call_number}->owning_lib;
170 if ($u->{call_number}->isnew) {
171 if ($new_cns_by_owner{$owner}) {
172 $u->{call_number} = $new_cns_by_owner{$owner};
174 $u->{call_number}->clear_id;
175 $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request(
176 'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number}
181 if ($u->{uri}->isnew) {
182 if ($new_uris_by_owner{$owner}) {
183 $u->{uri} = $new_uris_by_owner{$owner};
185 $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request(
186 'open-ils.cstore.direct.asset.uri.create' => $u->{uri}
191 my $umap = Fieldmapper::asset::uri_call_number_map->new;
192 $umap->uri($u->{uri}->id);
193 $umap->call_number($u->{call_number}->id);
195 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1) if (!$tmp);
198 # update full_rec stuff ...
199 $tmp = $cstore->request(
200 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
201 { record => $bib->id }
204 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
205 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
207 # update rec_descriptor stuff ...
208 $tmp = $cstore->request(
209 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
210 { record => $bib->id }
213 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
214 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
216 # deal with classed fields...
217 for my $class ( qw/title author subject keyword series/ ) {
218 $tmp = $cstore->request(
219 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
220 { source => $bib->id }
223 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
225 for my $obj ( @{ $blob->{field_entries} } ) {
226 my $class = $obj->class_name;
227 $class =~ s/^Fieldmapper:://o;
229 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
234 $tmp = $cstore->request(
235 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
236 { source => $bib->id }
239 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
242 my $old_mrs = $cstore->request(
243 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
244 )->gather(1) if (@$tmp);
246 $old_mrs = [] if (!ref($old_mrs));
249 for my $m (@$old_mrs) {
250 if ($m->fingerprint eq $bib->fingerprint) {
253 my $others = $cstore->request(
254 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
259 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
269 # Get the matchin MR, if any.
270 $mr = $cstore->request(
271 'open-ils.cstore.direct.metabib.metarecord.search',
272 { fingerprint => $bib->fingerprint }
275 $holds = $cstore->request(
276 'open-ils.cstore.direct.action.hold_request.search.atomic',
277 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
278 )->gather(1) if (@$old_mrs);
281 for my $h (@$holds) {
283 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
290 $mr = new Fieldmapper::metabib::metarecord;
291 $mr->fingerprint( $bib->fingerprint );
292 $mr->master_record( $bib->id );
295 "open-ils.cstore.direct.metabib.metarecord.create",
296 $mr => { quiet => 'true' }
300 for my $h (grep { !$_->ischanged } @$holds) {
302 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
305 my $mrm = $cstore->request(
306 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
307 { metarecord => $mr->id }
311 my $best = $cstore->request(
312 "open-ils.cstore.direct.biblio.record_entry.search",
313 { id => [ map { $_->source } @$mrm ] },
314 { 'select' => { bre => [ qw/id quality/ ] },
315 order_by => { bre => "quality desc" },
320 if ($best->quality > $bib->quality) {
321 $mr->master_record($best->id);
323 $mr->master_record($bib->id);
326 $mr->master_record($bib->id);
331 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
334 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
335 $mrm->source($bib->id);
336 $mrm->metarecord($mr->id);
338 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
339 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
341 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
346 __PACKAGE__->register_method(
347 api_name => "open-ils.ingest.full.biblio.object",
348 method => "rw_biblio_ingest_single_object",
353 sub rw_biblio_ingest_single_record {
358 OpenILS::Application::Ingest->post_init();
359 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
360 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
362 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
364 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
367 return undef unless ($r and @$r);
369 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
371 __PACKAGE__->register_method(
372 api_name => "open-ils.ingest.full.biblio.record",
373 method => "rw_biblio_ingest_single_record",
378 sub rw_biblio_ingest_record_list {
381 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
383 OpenILS::Application::Ingest->post_init();
384 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
385 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
387 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
389 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
392 return undef unless ($r and @$r);
395 $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
399 __PACKAGE__->register_method(
400 api_name => "open-ils.ingest.full.biblio.record_list",
401 method => "rw_biblio_ingest_record_list",
406 sub ro_biblio_ingest_single_object {
410 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
414 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
417 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
418 $max_cn = int($cn->id) + 1000;
422 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
423 $max_uri = int($cn->id) + 1000;
428 my $document = $parser->parse_string($xml);
430 my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib, $max_cn, $max_uri);
431 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
432 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
433 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
434 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
436 $_->source($bib->id) for (@mXfe);
437 $_->record($bib->id) for (@mfr);
438 $rd->record($bib->id) if ($rd);
440 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris };
442 __PACKAGE__->register_method(
443 api_name => "open-ils.ingest.full.biblio.object.readonly",
444 method => "ro_biblio_ingest_single_object",
449 sub ro_biblio_ingest_single_xml {
452 my $xml = OpenILS::Application::Ingest::entityize(shift);
454 my $document = $parser->parse_string($xml);
456 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
457 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
458 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
459 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
461 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
463 __PACKAGE__->register_method(
464 api_name => "open-ils.ingest.full.biblio.xml.readonly",
465 method => "ro_biblio_ingest_single_xml",
470 sub ro_biblio_ingest_single_record {
475 OpenILS::Application::Ingest->post_init();
476 my $r = OpenSRF::AppSession
477 ->create('open-ils.cstore')
478 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
481 return undef unless ($r and @$r);
483 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
485 $_->source($rec) for (@{$res->{field_entries}});
486 $_->record($rec) for (@{$res->{full_rec}});
487 $res->{descriptor}->record($rec);
491 __PACKAGE__->register_method(
492 api_name => "open-ils.ingest.full.biblio.record.readonly",
493 method => "ro_biblio_ingest_single_record",
498 sub ro_biblio_ingest_stream_record {
502 OpenILS::Application::Ingest->post_init();
504 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
506 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
508 my $rec = $resp->content;
509 last unless (defined $rec);
511 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
512 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
514 $_->source($rec) for (@{$res->{field_entries}});
515 $_->record($rec) for (@{$res->{full_rec}});
517 $client->respond( $res );
522 __PACKAGE__->register_method(
523 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
524 method => "ro_biblio_ingest_stream_record",
529 sub ro_biblio_ingest_stream_xml {
533 OpenILS::Application::Ingest->post_init();
535 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
537 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
539 my $xml = $resp->content;
540 last unless (defined $xml);
542 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
543 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
545 $client->respond( $res );
550 __PACKAGE__->register_method(
551 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
552 method => "ro_biblio_ingest_stream_xml",
557 sub rw_biblio_ingest_stream_import {
561 OpenILS::Application::Ingest->post_init();
563 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
565 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
567 my $bib = $resp->content;
568 last unless (defined $bib);
570 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
571 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
573 $_->source($bib->id) for (@{$res->{field_entries}});
574 $_->record($bib->id) for (@{$res->{full_rec}});
576 $client->respond( $res );
581 __PACKAGE__->register_method(
582 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
583 method => "rw_biblio_ingest_stream_import",
589 # --------------------------------------------------------------------------------
592 package OpenILS::Application::Ingest::Authority;
593 use base qw/OpenILS::Application::Ingest/;
594 use Unicode::Normalize;
596 sub ro_authority_ingest_single_object {
600 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
602 my $document = $parser->parse_string($xml);
604 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
606 $_->record($bib->id) for (@mfr);
608 return { full_rec => \@mfr };
610 __PACKAGE__->register_method(
611 api_name => "open-ils.ingest.full.authority.object.readonly",
612 method => "ro_authority_ingest_single_object",
617 sub ro_authority_ingest_single_xml {
620 my $xml = OpenILS::Application::Ingest::entityize(shift);
622 my $document = $parser->parse_string($xml);
624 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
626 return { full_rec => \@mfr };
628 __PACKAGE__->register_method(
629 api_name => "open-ils.ingest.full.authority.xml.readonly",
630 method => "ro_authority_ingest_single_xml",
635 sub ro_authority_ingest_single_record {
640 OpenILS::Application::Ingest->post_init();
641 my $r = OpenSRF::AppSession
642 ->create('open-ils.cstore')
643 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
646 return undef unless ($r and @$r);
648 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
650 $_->record($rec) for (@{$res->{full_rec}});
651 $res->{descriptor}->record($rec);
655 __PACKAGE__->register_method(
656 api_name => "open-ils.ingest.full.authority.record.readonly",
657 method => "ro_authority_ingest_single_record",
662 sub ro_authority_ingest_stream_record {
666 OpenILS::Application::Ingest->post_init();
668 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
670 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
672 my $rec = $resp->content;
673 last unless (defined $rec);
675 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
676 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
678 $_->record($rec) for (@{$res->{full_rec}});
680 $client->respond( $res );
685 __PACKAGE__->register_method(
686 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
687 method => "ro_authority_ingest_stream_record",
692 sub ro_authority_ingest_stream_xml {
696 OpenILS::Application::Ingest->post_init();
698 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
700 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
702 my $xml = $resp->content;
703 last unless (defined $xml);
705 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
706 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
708 $client->respond( $res );
713 __PACKAGE__->register_method(
714 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
715 method => "ro_authority_ingest_stream_xml",
720 sub rw_authority_ingest_stream_import {
724 OpenILS::Application::Ingest->post_init();
726 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
728 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
730 my $bib = $resp->content;
731 last unless (defined $bib);
733 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
734 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
736 $_->record($bib->id) for (@{$res->{full_rec}});
738 $client->respond( $res );
743 __PACKAGE__->register_method(
744 api_name => "open-ils.ingest.full.authority.bib_stream.import",
745 method => "rw_authority_ingest_stream_import",
751 # --------------------------------------------------------------------------------
752 # MARC index extraction
754 package OpenILS::Application::Ingest::XPATH;
755 use base qw/OpenILS::Application::Ingest/;
756 use Unicode::Normalize;
758 # give this an XML documentElement and an XPATH expression
759 sub xpath_to_string {
763 my $ns_prefix = shift;
766 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
770 # grab the set of matching nodes
771 my @nodes = $xml->findnodes( $xpath );
772 for my $value (@nodes) {
774 # grab all children of the node
775 my @children = $value->childNodes();
776 for my $child (@children) {
778 # add the childs content to the growing buffer
779 my $content = quotemeta($child->textContent);
780 next if ($unique && $string =~ /$content/); # uniquify the values
781 $string .= $child->textContent . " ";
784 $string .= $value->textContent . " ";
788 $string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
789 $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
794 sub class_index_string_xml {
800 OpenILS::Application::Ingest->post_init();
801 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
805 for my $class (@classes) {
806 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
807 for my $type ( keys %{ $xpathset->{$class} } ) {
809 my $def = $xpathset->{$class}->{$type};
810 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
815 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
816 $transform_cache{$def->{format}} = $document;
819 my $value = xpath_to_string(
820 $document->documentElement => $def->{xpath},
821 $sf->{ns} => $def->{format},
827 $value = NFD($value);
828 $value =~ s/\pM+//sgo;
829 $value =~ s/\pC+//sgo;
830 $value =~ s/\W+$//sgo;
832 $value =~ s/\b\.+\b//sgo;
835 my $fm = $class_constructor->new;
836 $fm->value( $value );
837 $fm->field( $xpathset->{$class}->{$type}->{id} );
838 $client->respond($fm);
843 __PACKAGE__->register_method(
844 api_name => "open-ils.ingest.field_entry.class.xml",
845 method => "class_index_string_xml",
851 sub class_index_string_record {
857 OpenILS::Application::Ingest->post_init();
858 my $r = OpenSRF::AppSession
859 ->create('open-ils.cstore')
860 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
863 return undef unless ($r and @$r);
865 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
867 $client->respond($fm);
871 __PACKAGE__->register_method(
872 api_name => "open-ils.ingest.field_entry.class.record",
873 method => "class_index_string_record",
879 sub all_index_string_xml {
884 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
885 $client->respond($fm);
889 __PACKAGE__->register_method(
890 api_name => "open-ils.ingest.extract.field_entry.all.xml",
891 method => "all_index_string_xml",
897 sub all_index_string_record {
902 OpenILS::Application::Ingest->post_init();
903 my $r = OpenSRF::AppSession
904 ->create('open-ils.cstore')
905 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
908 return undef unless ($r and @$r);
910 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
912 $client->respond($fm);
916 __PACKAGE__->register_method(
917 api_name => "open-ils.ingest.extract.field_entry.all.record",
918 method => "all_index_string_record",
924 # --------------------------------------------------------------------------------
927 package OpenILS::Application::Ingest::FlatMARC;
928 use base qw/OpenILS::Application::Ingest/;
929 use Unicode::Normalize;
932 sub _marcxml_to_full_rows {
935 my $xmltype = shift || 'metabib';
937 my $type = "Fieldmapper::${xmltype}::full_rec";
941 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
943 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
944 next unless $tagline;
949 my $val = $tagline->textContent;
959 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
960 next unless $tagline;
964 $ns->tag( $tagline->getAttribute( "tag" ) );
965 my $val = $tagline->textContent;
975 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
976 next unless $tagline;
978 my $tag = $tagline->getAttribute( "tag" );
979 my $ind1 = $tagline->getAttribute( "ind1" );
980 my $ind2 = $tagline->getAttribute( "ind2" );
982 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
990 $ns->subfield( $data->getAttribute( "code" ) );
991 my $val = $data->textContent;
996 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
997 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
998 $ns->value( lc($val) );
1003 if ($xmltype eq 'metabib' and $tag eq '245') {
1006 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1007 next unless ($data and $data->getAttribute( "code" ) eq 'a');
1014 $ns->subfield( $data->getAttribute( "code" ) );
1015 my $val = substr( $data->textContent, $ind2 );
1017 $val =~ s/\pM+//sgo;
1018 $val =~ s/\pC+//sgo;
1019 $val =~ s/\W+$//sgo;
1020 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
1021 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
1022 $ns->value( lc($val) );
1029 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
1038 $log->debug("processing [$xml]");
1040 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
1042 my $type = 'metabib';
1043 $type = 'authority' if ($self->api_name =~ /authority/o);
1045 OpenILS::Application::Ingest->post_init();
1047 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1050 __PACKAGE__->register_method(
1051 api_name => "open-ils.ingest.flat_marc.authority.xml",
1052 method => "flat_marc_xml",
1057 __PACKAGE__->register_method(
1058 api_name => "open-ils.ingest.flat_marc.biblio.xml",
1059 method => "flat_marc_xml",
1065 sub flat_marc_record {
1070 my $type = 'biblio';
1071 $type = 'authority' if ($self->api_name =~ /authority/o);
1073 OpenILS::Application::Ingest->post_init();
1074 my $r = OpenSRF::AppSession
1075 ->create('open-ils.cstore')
1076 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
1080 return undef unless ($r and $r->marc);
1082 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
1083 for my $row (@rows) {
1084 $client->respond($row);
1085 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
1089 __PACKAGE__->register_method(
1090 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
1091 method => "flat_marc_record",
1096 __PACKAGE__->register_method(
1097 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
1098 method => "flat_marc_record",
1105 # --------------------------------------------------------------------------------
1108 package OpenILS::Application::Ingest::Biblio::URI;
1109 use base qw/OpenILS::Application::Ingest/;
1110 use Unicode::Normalize;
1111 use OpenSRF::EX qw/:try/;
1114 sub _extract_856_uris {
1118 my $max_uri = shift;
1121 my $recid = $rec->id;
1122 my $marcxml = $rec->marc;
1124 my $document = $parser->parse_string($marcxml);
1125 my @nodes = $document->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]');
1127 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
1131 for my $node (@nodes) {
1132 # first, is there a URI?
1133 my $href = $node->findvalue('*[local-name()="subfield" and @code="u"]/text()');
1134 next unless ($href);
1136 # now, find the best possible label
1137 my $label = $node->findvalue('*[local-name()="subfield" and @code="y"]/text()');
1138 $label ||= $node->findvalue('*[local-name()="subfield" and @code="3"]/text()');
1142 my $use = $node->findvalue('*[local-name()="subfield" and @code="z"]/text()');
1143 $use ||= $node->findvalue('*[local-name()="subfield" and @code="2"]/text()');
1144 $use ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1146 # moving on to the URI owner
1147 my $owner = $node->findvalue('*[local-name()="subfield" and @code="w"]/text()');
1148 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1149 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce
1151 $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ...
1153 # no owner? skip it :(
1154 next unless ($owner);
1157 ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} )
1162 # now we can construct the uri object
1164 ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use_restriction => $use, active => 't' } )
1168 $uri = Fieldmapper::asset::uri->new;
1170 $uri->id( $$max_uri++ );
1171 $uri->label($label);
1174 $uri->use_restriction($use);
1177 # see if we need to create a call number
1178 my $cn = $cn_cache{$org->id};
1179 $cn = $cn->clone if ($cn);
1180 $cn->clear_isnew if ($cn);
1183 ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owning_lib => $org->id, record => $recid, label => '##URI##' } )
1187 $cn = Fieldmapper::asset::call_number->new;
1190 $cn->id( $$max_cn++ );
1191 $cn->owning_lib( $org->id );
1192 $cn->record( $recid );
1193 $cn->create_date( 'now' );
1194 $cn->creator( $rec->creator );
1195 $cn->editor( $rec->editor );
1196 $cn->edit_date( 'now' );
1197 $cn->label( '##URI##' );
1200 $cn_cache{$org->id} = $cn;
1202 push @objects, { uri => $uri, call_number => $cn };
1205 $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid");
1209 sub get_uris_record {
1214 OpenILS::Application::Ingest->post_init();
1215 my $r = OpenSRF::AppSession
1216 ->create('open-ils.cstore')
1217 ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec )
1220 return undef unless ($r and $r->marc);
1222 $client->respond($_) for (_extract_856_uris($r));
1225 __PACKAGE__->register_method(
1226 api_name => "open-ils.ingest.856_uri.record",
1227 method => "get_uris_record",
1233 sub get_uris_object {
1238 my $max_uri = shift;
1240 return undef unless ($obj and $obj->marc);
1242 $client->respond($_) for (_extract_856_uris($obj, \$max_cn, \$max_uri));
1245 __PACKAGE__->register_method(
1246 api_name => "open-ils.ingest.856_uri.object",
1247 method => "get_uris_object",
1254 # --------------------------------------------------------------------------------
1257 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1258 use base qw/OpenILS::Application::Ingest/;
1259 use Unicode::Normalize;
1260 use OpenSRF::EX qw/:try/;
1262 sub biblio_fingerprint_record {
1267 OpenILS::Application::Ingest->post_init();
1269 my $r = OpenSRF::AppSession
1270 ->create('open-ils.cstore')
1271 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1274 return undef unless ($r and $r->marc);
1276 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1277 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1278 $fp->{quality} = int($fp->{quality});
1281 __PACKAGE__->register_method(
1282 api_name => "open-ils.ingest.fingerprint.record",
1283 method => "biblio_fingerprint_record",
1289 sub biblio_fingerprint {
1292 my $xml = OpenILS::Application::Ingest::entityize(shift);
1294 $log->internal("Got MARC [$xml]");
1297 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1298 my $conf = OpenSRF::Utils::SettingsClient->new;
1300 my $libs = $conf->config_value(@pfx, 'script_path');
1301 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1302 my $script_libs = (ref($libs)) ? $libs : [$libs];
1304 $log->debug("Loading script $script_file for biblio fingerprinting...");
1306 $fp_script = new OpenILS::Utils::ScriptRunner
1307 ( file => $script_file,
1308 paths => $script_libs,
1309 reset_count => 100 );
1312 $fp_script->insert('environment' => {marc => $xml} => 1);
1314 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1315 $log->debug("Script for biblio fingerprinting completed successfully...");
1319 __PACKAGE__->register_method(
1320 api_name => "open-ils.ingest.fingerprint.xml",
1321 method => "biblio_fingerprint",
1327 sub biblio_descriptor {
1330 my $xml = OpenILS::Application::Ingest::entityize(shift);
1332 $log->internal("Got MARC [$xml]");
1335 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1336 my $conf = OpenSRF::Utils::SettingsClient->new;
1338 my $libs = $conf->config_value(@pfx, 'script_path');
1339 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1340 my $script_libs = (ref($libs)) ? $libs : [$libs];
1342 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1344 $rd_script = new OpenILS::Utils::ScriptRunner
1345 ( file => $script_file,
1346 paths => $script_libs,
1347 reset_count => 100 );
1350 $log->debug("Setting up environment for descriptor extraction script...");
1351 $rd_script->insert('environment.marc' => $xml => 1);
1352 $log->debug("Environment building complete...");
1354 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1355 $log->debug("Script for biblio descriptor extraction completed successfully");
1357 my $d1 = $res->date1;
1358 if ($d1 && $d1 ne ' ') {
1363 my $d2 = $res->date2;
1364 if ($d2 && $d2 ne ' ') {
1371 __PACKAGE__->register_method(
1372 api_name => "open-ils.ingest.descriptor.xml",
1373 method => "biblio_descriptor",