1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
16 use OpenILS::Utils::Fieldmapper;
20 use Time::HiRes qw(time);
22 our %supported_formats = (
23 mods32 => {ns => 'http://www.loc.gov/mods/v3'},
24 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
25 mods => {ns => 'http://www.loc.gov/mods/'},
26 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
27 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
28 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
29 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
30 atom => {ns => 'http://www.w3.org/2005/Atom'},
31 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
35 rss10 => {ns => 'http://purl.org/rss/1.0/'},
36 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
41 my $log = 'OpenSRF::Utils::Logger';
43 my $parser = XML::LibXML->new();
44 my $xslt = XML::LibXSLT->new();
54 unless (keys %$xpathset) {
55 $log->debug("Running post_init", DEBUG);
57 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
59 unless ($supported_formats{mods}{xslt}) {
60 $log->debug("Loading MODS XSLT", DEBUG);
61 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
62 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
65 unless ($supported_formats{mods3}{xslt}) {
66 $log->debug("Loading MODS v3 XSLT", DEBUG);
67 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
68 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
71 unless ($supported_formats{mods32}{xslt}) {
72 $log->debug("Loading MODS v32 XSLT", DEBUG);
73 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
74 $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
77 my $req = OpenSRF::AppSession
78 ->create('open-ils.cstore')
80 # XXX testing new metabib field use for faceting
81 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
82 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
86 if (ref $req and @$req) {
88 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
89 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
90 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
91 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
102 $stuff = NFD($stuff);
104 $stuff = NFC($stuff);
107 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
111 # --------------------------------------------------------------------------------
114 package OpenILS::Application::Ingest::Biblio;
115 use base qw/OpenILS::Application::Ingest/;
116 use Unicode::Normalize;
118 sub rw_biblio_ingest_single_object {
123 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
124 return undef unless ($blob);
126 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
127 $bib->quality( $blob->{fingerprint}->{quality} );
129 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
131 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
134 # update uri stuff ...
136 # gather URI call numbers for this record
137 my $uri_cns = $u->{call_number} = $cstore->request(
138 'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' }
142 # gather the maps for those call numbers
143 my $uri_maps = $u->{call_number} = $cstore->request(
144 'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns }
147 # delete the old maps
148 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps);
150 # and delete the call numbers if there are no more URIs
151 if (!@{ $blob->{uri} }) {
152 $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns);
156 # now, add CNs, URIs and maps
157 my %new_cns_by_owner;
158 my %new_uris_by_owner;
159 for my $u ( @{ $blob->{uri} } ) {
161 my $owner = $u->{call_number}->owning_lib;
163 if ($u->{call_number}->isnew) {
164 if ($new_cns_by_owner{$owner}) {
165 $u->{call_number} = $new_cns_by_owner{$owner};
167 $u->{call_number}->clear_id;
168 $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request(
169 'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number}
174 if ($u->{uri}->isnew) {
175 if ($new_uris_by_owner{$owner}) {
176 $u->{uri} = $new_uris_by_owner{$owner};
178 $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request(
179 'open-ils.cstore.direct.asset.uri.create' => $u->{uri}
184 my $umap = Fieldmapper::asset::uri_call_number_map->new;
185 $umap->uri($u->{uri}->id);
186 $umap->call_number($u->{call_number}->id);
188 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1) if (!$tmp);
191 # update full_rec stuff ...
192 $tmp = $cstore->request(
193 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
194 { record => $bib->id }
197 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
198 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
200 # update rec_descriptor stuff ...
201 $tmp = $cstore->request(
202 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
203 { record => $bib->id }
206 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
207 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
209 # deal with classed fields...
210 for my $class ( qw/title author subject keyword series/ ) {
211 $tmp = $cstore->request(
212 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
213 { source => $bib->id }
216 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
218 for my $obj ( @{ $blob->{field_entries} } ) {
219 my $class = $obj->class_name;
220 $class =~ s/^Fieldmapper:://o;
222 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
227 $tmp = $cstore->request(
228 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
229 { source => $bib->id }
232 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
235 my $old_mrs = $cstore->request(
236 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
237 )->gather(1) if (@$tmp);
239 $old_mrs = [] if (!ref($old_mrs));
242 for my $m (@$old_mrs) {
243 if ($m->fingerprint eq $bib->fingerprint) {
246 my $others = $cstore->request(
247 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
252 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
262 # Get the matchin MR, if any.
263 $mr = $cstore->request(
264 'open-ils.cstore.direct.metabib.metarecord.search',
265 { fingerprint => $bib->fingerprint }
268 $holds = $cstore->request(
269 'open-ils.cstore.direct.action.hold_request.search.atomic',
270 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
271 )->gather(1) if (@$old_mrs);
274 for my $h (@$holds) {
276 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
283 $mr = new Fieldmapper::metabib::metarecord;
284 $mr->fingerprint( $bib->fingerprint );
285 $mr->master_record( $bib->id );
288 "open-ils.cstore.direct.metabib.metarecord.create",
289 $mr => { quiet => 'true' }
293 for my $h (grep { !$_->ischanged } @$holds) {
295 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
298 my $mrm = $cstore->request(
299 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
300 { metarecord => $mr->id }
304 my $best = $cstore->request(
305 "open-ils.cstore.direct.biblio.record_entry.search",
306 { id => [ map { $_->source } @$mrm ] },
307 { 'select' => { bre => [ qw/id quality/ ] },
308 order_by => { bre => "quality desc" },
313 if ($best->quality > $bib->quality) {
314 $mr->master_record($best->id);
316 $mr->master_record($bib->id);
319 $mr->master_record($bib->id);
324 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
327 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
328 $mrm->source($bib->id);
329 $mrm->metarecord($mr->id);
331 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
332 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
334 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
339 __PACKAGE__->register_method(
340 api_name => "open-ils.ingest.full.biblio.object",
341 method => "rw_biblio_ingest_single_object",
346 sub rw_biblio_ingest_single_record {
351 OpenILS::Application::Ingest->post_init();
352 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
353 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
355 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
357 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
360 return undef unless ($r and @$r);
362 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
364 __PACKAGE__->register_method(
365 api_name => "open-ils.ingest.full.biblio.record",
366 method => "rw_biblio_ingest_single_record",
371 sub rw_biblio_ingest_record_list {
374 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
376 OpenILS::Application::Ingest->post_init();
377 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
378 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
380 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
382 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
385 return undef unless ($r and @$r);
388 $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
392 __PACKAGE__->register_method(
393 api_name => "open-ils.ingest.full.biblio.record_list",
394 method => "rw_biblio_ingest_record_list",
399 sub ro_biblio_ingest_single_object {
403 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
407 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
410 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
411 $max_cn = int($cn->id) + 1000;
415 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
416 $max_uri = int($cn->id) + 1000;
421 my $document = $parser->parse_string($xml);
423 my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib, $max_cn, $max_uri);
424 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
425 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
426 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
427 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
429 $_->source($bib->id) for (@mXfe);
430 $_->record($bib->id) for (@mfr);
431 $rd->record($bib->id) if ($rd);
433 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris };
435 __PACKAGE__->register_method(
436 api_name => "open-ils.ingest.full.biblio.object.readonly",
437 method => "ro_biblio_ingest_single_object",
442 sub ro_biblio_ingest_single_xml {
445 my $xml = OpenILS::Application::Ingest::entityize(shift);
447 my $document = $parser->parse_string($xml);
449 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
450 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
451 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
452 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
454 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
456 __PACKAGE__->register_method(
457 api_name => "open-ils.ingest.full.biblio.xml.readonly",
458 method => "ro_biblio_ingest_single_xml",
463 sub ro_biblio_ingest_single_record {
468 OpenILS::Application::Ingest->post_init();
469 my $r = OpenSRF::AppSession
470 ->create('open-ils.cstore')
471 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
474 return undef unless ($r and @$r);
476 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
478 $_->source($rec) for (@{$res->{field_entries}});
479 $_->record($rec) for (@{$res->{full_rec}});
480 $res->{descriptor}->record($rec);
484 __PACKAGE__->register_method(
485 api_name => "open-ils.ingest.full.biblio.record.readonly",
486 method => "ro_biblio_ingest_single_record",
491 sub ro_biblio_ingest_stream_record {
495 OpenILS::Application::Ingest->post_init();
497 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
499 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
501 my $rec = $resp->content;
502 last unless (defined $rec);
504 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
505 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
507 $_->source($rec) for (@{$res->{field_entries}});
508 $_->record($rec) for (@{$res->{full_rec}});
510 $client->respond( $res );
515 __PACKAGE__->register_method(
516 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
517 method => "ro_biblio_ingest_stream_record",
522 sub ro_biblio_ingest_stream_xml {
526 OpenILS::Application::Ingest->post_init();
528 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
530 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
532 my $xml = $resp->content;
533 last unless (defined $xml);
535 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
536 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
538 $client->respond( $res );
543 __PACKAGE__->register_method(
544 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
545 method => "ro_biblio_ingest_stream_xml",
550 sub rw_biblio_ingest_stream_import {
554 OpenILS::Application::Ingest->post_init();
556 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
558 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
560 my $bib = $resp->content;
561 last unless (defined $bib);
563 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
564 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
566 $_->source($bib->id) for (@{$res->{field_entries}});
567 $_->record($bib->id) for (@{$res->{full_rec}});
569 $client->respond( $res );
574 __PACKAGE__->register_method(
575 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
576 method => "rw_biblio_ingest_stream_import",
582 # --------------------------------------------------------------------------------
585 package OpenILS::Application::Ingest::Authority;
586 use base qw/OpenILS::Application::Ingest/;
587 use Unicode::Normalize;
589 sub ro_authority_ingest_single_object {
593 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
595 my $document = $parser->parse_string($xml);
597 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
599 $_->record($bib->id) for (@mfr);
601 return { full_rec => \@mfr };
603 __PACKAGE__->register_method(
604 api_name => "open-ils.ingest.full.authority.object.readonly",
605 method => "ro_authority_ingest_single_object",
610 sub ro_authority_ingest_single_xml {
613 my $xml = OpenILS::Application::Ingest::entityize(shift);
615 my $document = $parser->parse_string($xml);
617 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
619 return { full_rec => \@mfr };
621 __PACKAGE__->register_method(
622 api_name => "open-ils.ingest.full.authority.xml.readonly",
623 method => "ro_authority_ingest_single_xml",
628 sub ro_authority_ingest_single_record {
633 OpenILS::Application::Ingest->post_init();
634 my $r = OpenSRF::AppSession
635 ->create('open-ils.cstore')
636 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
639 return undef unless ($r and @$r);
641 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
643 $_->record($rec) for (@{$res->{full_rec}});
644 $res->{descriptor}->record($rec);
648 __PACKAGE__->register_method(
649 api_name => "open-ils.ingest.full.authority.record.readonly",
650 method => "ro_authority_ingest_single_record",
655 sub ro_authority_ingest_stream_record {
659 OpenILS::Application::Ingest->post_init();
661 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
663 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
665 my $rec = $resp->content;
666 last unless (defined $rec);
668 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
669 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
671 $_->record($rec) for (@{$res->{full_rec}});
673 $client->respond( $res );
678 __PACKAGE__->register_method(
679 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
680 method => "ro_authority_ingest_stream_record",
685 sub ro_authority_ingest_stream_xml {
689 OpenILS::Application::Ingest->post_init();
691 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
693 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
695 my $xml = $resp->content;
696 last unless (defined $xml);
698 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
699 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
701 $client->respond( $res );
706 __PACKAGE__->register_method(
707 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
708 method => "ro_authority_ingest_stream_xml",
713 sub rw_authority_ingest_stream_import {
717 OpenILS::Application::Ingest->post_init();
719 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
721 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
723 my $bib = $resp->content;
724 last unless (defined $bib);
726 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
727 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
729 $_->record($bib->id) for (@{$res->{full_rec}});
731 $client->respond( $res );
736 __PACKAGE__->register_method(
737 api_name => "open-ils.ingest.full.authority.bib_stream.import",
738 method => "rw_authority_ingest_stream_import",
744 # --------------------------------------------------------------------------------
745 # MARC index extraction
747 package OpenILS::Application::Ingest::XPATH;
748 use base qw/OpenILS::Application::Ingest/;
749 use Unicode::Normalize;
751 # give this an XML documentElement and an XPATH expression
752 sub xpath_to_string {
756 my $ns_prefix = shift;
759 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
763 # grab the set of matching nodes
764 my @nodes = $xml->findnodes( $xpath );
765 for my $value (@nodes) {
767 # grab all children of the node
768 my @children = $value->childNodes();
769 for my $child (@children) {
771 # add the childs content to the growing buffer
772 my $content = quotemeta($child->textContent);
773 next if ($unique && $string =~ /$content/); # uniquify the values
774 $string .= $child->textContent . " ";
777 $string .= $value->textContent . " ";
781 $string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
782 $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
787 sub class_index_string_xml {
793 OpenILS::Application::Ingest->post_init();
794 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
798 for my $class (@classes) {
799 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
800 for my $type ( keys %{ $xpathset->{$class} } ) {
802 my $def = $xpathset->{$class}->{$type};
803 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
808 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
809 $transform_cache{$def->{format}} = $document;
812 my $value = xpath_to_string(
813 $document->documentElement => $def->{xpath},
814 $sf->{ns} => $def->{format},
820 $value = NFD($value);
821 $value =~ s/\pM+//sgo;
822 $value =~ s/\pC+//sgo;
823 $value =~ s/\W+$//sgo;
825 $value =~ s/\b\.+\b//sgo;
828 my $fm = $class_constructor->new;
829 $fm->value( $value );
830 $fm->field( $xpathset->{$class}->{$type}->{id} );
831 $client->respond($fm);
836 __PACKAGE__->register_method(
837 api_name => "open-ils.ingest.field_entry.class.xml",
838 method => "class_index_string_xml",
844 sub class_index_string_record {
850 OpenILS::Application::Ingest->post_init();
851 my $r = OpenSRF::AppSession
852 ->create('open-ils.cstore')
853 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
856 return undef unless ($r and @$r);
858 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
860 $client->respond($fm);
864 __PACKAGE__->register_method(
865 api_name => "open-ils.ingest.field_entry.class.record",
866 method => "class_index_string_record",
872 sub all_index_string_xml {
877 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
878 $client->respond($fm);
882 __PACKAGE__->register_method(
883 api_name => "open-ils.ingest.extract.field_entry.all.xml",
884 method => "all_index_string_xml",
890 sub all_index_string_record {
895 OpenILS::Application::Ingest->post_init();
896 my $r = OpenSRF::AppSession
897 ->create('open-ils.cstore')
898 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
901 return undef unless ($r and @$r);
903 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
905 $client->respond($fm);
909 __PACKAGE__->register_method(
910 api_name => "open-ils.ingest.extract.field_entry.all.record",
911 method => "all_index_string_record",
917 # --------------------------------------------------------------------------------
920 package OpenILS::Application::Ingest::FlatMARC;
921 use base qw/OpenILS::Application::Ingest/;
922 use Unicode::Normalize;
925 sub _marcxml_to_full_rows {
928 my $xmltype = shift || 'metabib';
930 my $type = "Fieldmapper::${xmltype}::full_rec";
934 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
936 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
937 next unless $tagline;
942 my $val = $tagline->textContent;
952 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
953 next unless $tagline;
957 $ns->tag( $tagline->getAttribute( "tag" ) );
958 my $val = $tagline->textContent;
968 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
969 next unless $tagline;
971 my $tag = $tagline->getAttribute( "tag" );
972 my $ind1 = $tagline->getAttribute( "ind1" );
973 my $ind2 = $tagline->getAttribute( "ind2" );
975 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
983 $ns->subfield( $data->getAttribute( "code" ) );
984 my $val = $data->textContent;
989 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
990 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
991 $ns->value( lc($val) );
996 if ($xmltype eq 'metabib' and $tag eq '245') {
999 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1000 next unless ($data and $data->getAttribute( "code" ) eq 'a');
1007 $ns->subfield( $data->getAttribute( "code" ) );
1008 my $val = substr( $data->textContent, $ind2 );
1010 $val =~ s/\pM+//sgo;
1011 $val =~ s/\pC+//sgo;
1012 $val =~ s/\W+$//sgo;
1013 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
1014 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
1015 $ns->value( lc($val) );
1022 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
1031 $log->debug("processing [$xml]");
1033 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
1035 my $type = 'metabib';
1036 $type = 'authority' if ($self->api_name =~ /authority/o);
1038 OpenILS::Application::Ingest->post_init();
1040 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1043 __PACKAGE__->register_method(
1044 api_name => "open-ils.ingest.flat_marc.authority.xml",
1045 method => "flat_marc_xml",
1050 __PACKAGE__->register_method(
1051 api_name => "open-ils.ingest.flat_marc.biblio.xml",
1052 method => "flat_marc_xml",
1058 sub flat_marc_record {
1063 my $type = 'biblio';
1064 $type = 'authority' if ($self->api_name =~ /authority/o);
1066 OpenILS::Application::Ingest->post_init();
1067 my $r = OpenSRF::AppSession
1068 ->create('open-ils.cstore')
1069 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
1073 return undef unless ($r and $r->marc);
1075 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
1076 for my $row (@rows) {
1077 $client->respond($row);
1078 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
1082 __PACKAGE__->register_method(
1083 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
1084 method => "flat_marc_record",
1089 __PACKAGE__->register_method(
1090 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
1091 method => "flat_marc_record",
1098 # --------------------------------------------------------------------------------
1101 package OpenILS::Application::Ingest::Biblio::URI;
1102 use base qw/OpenILS::Application::Ingest/;
1103 use Unicode::Normalize;
1104 use OpenSRF::EX qw/:try/;
1107 sub _extract_856_uris {
1111 my $max_uri = shift;
1114 my $recid = $rec->id;
1115 my $marcxml = $rec->marc;
1117 my $document = $parser->parse_string($marcxml);
1118 my @nodes = $document->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]');
1120 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
1124 for my $node (@nodes) {
1125 # first, is there a URI?
1126 my $href = $node->findvalue('*[local-name()="subfield" and @code="u"]/text()');
1127 next unless ($href);
1129 # now, find the best possible label
1130 my $label = $node->findvalue('*[local-name()="subfield" and @code="y"]/text()');
1131 $label ||= $node->findvalue('*[local-name()="subfield" and @code="3"]/text()');
1135 my $use = $node->findvalue('*[local-name()="subfield" and @code="z"]/text()');
1136 $use ||= $node->findvalue('*[local-name()="subfield" and @code="2"]/text()');
1137 $use ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1139 # moving on to the URI owner
1140 my $owner = $node->findvalue('*[local-name()="subfield" and @code="w"]/text()');
1141 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1142 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce
1144 $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ...
1146 # no owner? skip it :(
1147 next unless ($owner);
1150 ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} )
1155 # now we can construct the uri object
1157 ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use_restriction => $use, active => 't' } )
1161 $uri = Fieldmapper::asset::uri->new;
1163 $uri->id( $$max_uri++ );
1164 $uri->label($label);
1167 $uri->use_restriction($use);
1170 # see if we need to create a call number
1171 my $cn = $cn_cache{$org->id};
1172 $cn = $cn->clone if ($cn);
1173 $cn->clear_isnew if ($cn);
1176 ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owning_lib => $org->id, record => $recid, label => '##URI##' } )
1180 $cn = Fieldmapper::asset::call_number->new;
1183 $cn->id( $$max_cn++ );
1184 $cn->owning_lib( $org->id );
1185 $cn->record( $recid );
1186 $cn->create_date( 'now' );
1187 $cn->creator( $rec->creator );
1188 $cn->editor( $rec->editor );
1189 $cn->edit_date( 'now' );
1190 $cn->label( '##URI##' );
1193 $cn_cache{$org->id} = $cn;
1195 push @objects, { uri => $uri, call_number => $cn };
1198 $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid");
1202 sub get_uris_record {
1207 OpenILS::Application::Ingest->post_init();
1208 my $r = OpenSRF::AppSession
1209 ->create('open-ils.cstore')
1210 ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec )
1213 return undef unless ($r and $r->marc);
1215 $client->respond($_) for (_extract_856_uris($r));
1218 __PACKAGE__->register_method(
1219 api_name => "open-ils.ingest.856_uri.record",
1220 method => "get_uris_record",
1226 sub get_uris_object {
1231 my $max_uri = shift;
1233 return undef unless ($obj and $obj->marc);
1235 $client->respond($_) for (_extract_856_uris($obj, \$max_cn, \$max_uri));
1238 __PACKAGE__->register_method(
1239 api_name => "open-ils.ingest.856_uri.object",
1240 method => "get_uris_object",
1247 # --------------------------------------------------------------------------------
1250 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1251 use base qw/OpenILS::Application::Ingest/;
1252 use Unicode::Normalize;
1253 use OpenSRF::EX qw/:try/;
1255 sub biblio_fingerprint_record {
1260 OpenILS::Application::Ingest->post_init();
1262 my $r = OpenSRF::AppSession
1263 ->create('open-ils.cstore')
1264 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1267 return undef unless ($r and $r->marc);
1269 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1270 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1271 $fp->{quality} = int($fp->{quality});
1274 __PACKAGE__->register_method(
1275 api_name => "open-ils.ingest.fingerprint.record",
1276 method => "biblio_fingerprint_record",
1282 sub biblio_fingerprint {
1285 my $xml = OpenILS::Application::Ingest::entityize(shift);
1287 $log->internal("Got MARC [$xml]");
1290 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1291 my $conf = OpenSRF::Utils::SettingsClient->new;
1293 my $libs = $conf->config_value(@pfx, 'script_path');
1294 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1295 my $script_libs = (ref($libs)) ? $libs : [$libs];
1297 $log->debug("Loading script $script_file for biblio fingerprinting...");
1299 $fp_script = new OpenILS::Utils::ScriptRunner
1300 ( file => $script_file,
1301 paths => $script_libs,
1302 reset_count => 100 );
1305 $fp_script->insert('environment' => {marc => $xml} => 1);
1307 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1308 $log->debug("Script for biblio fingerprinting completed successfully...");
1312 __PACKAGE__->register_method(
1313 api_name => "open-ils.ingest.fingerprint.xml",
1314 method => "biblio_fingerprint",
1320 sub biblio_descriptor {
1323 my $xml = OpenILS::Application::Ingest::entityize(shift);
1325 $log->internal("Got MARC [$xml]");
1328 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1329 my $conf = OpenSRF::Utils::SettingsClient->new;
1331 my $libs = $conf->config_value(@pfx, 'script_path');
1332 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1333 my $script_libs = (ref($libs)) ? $libs : [$libs];
1335 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1337 $rd_script = new OpenILS::Utils::ScriptRunner
1338 ( file => $script_file,
1339 paths => $script_libs,
1340 reset_count => 100 );
1343 $log->debug("Setting up environment for descriptor extraction script...");
1344 $rd_script->insert('environment.marc' => $xml => 1);
1345 $log->debug("Environment building complete...");
1347 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1348 $log->debug("Script for biblio descriptor extraction completed successfully");
1350 my $d1 = $res->date1;
1351 if ($d1 && $d1 ne ' ') {
1356 my $d2 = $res->date2;
1357 if ($d2 && $d2 ne ' ') {
1364 __PACKAGE__->register_method(
1365 api_name => "open-ils.ingest.descriptor.xml",
1366 method => "biblio_descriptor",