1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::ScriptRunner;
14 use OpenILS::Utils::Fieldmapper;
15 use OpenSRF::Utils::JSON;
17 use OpenILS::Utils::Fieldmapper;
21 use Time::HiRes qw(time);
23 our %supported_formats = (
24 mods33 => {ns => 'http://www.loc.gov/mods/v3'},
25 mods32 => {ns => 'http://www.loc.gov/mods/v3'},
26 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
27 mods => {ns => 'http://www.loc.gov/mods/'},
28 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
29 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
30 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
31 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
32 atom => {ns => 'http://www.w3.org/2005/Atom'},
33 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
37 rss10 => {ns => 'http://purl.org/rss/1.0/'},
38 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
43 my $log = 'OpenSRF::Utils::Logger';
45 my $parser = XML::LibXML->new();
46 my $xslt = XML::LibXSLT->new();
56 unless (keys %$xpathset) {
57 $log->debug("Running post_init", DEBUG);
59 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
61 unless ($supported_formats{mods}{xslt}) {
62 $log->debug("Loading MODS XSLT", DEBUG);
63 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
64 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67 unless ($supported_formats{mods3}{xslt}) {
68 $log->debug("Loading MODS v3 XSLT", DEBUG);
69 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
70 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
73 unless ($supported_formats{mods32}{xslt}) {
74 $log->debug("Loading MODS v32 XSLT", DEBUG);
75 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
76 $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
79 unless ($supported_formats{mods33}{xslt}) {
80 $log->debug("Loading MODS v33 XSLT", DEBUG);
81 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS33.xsl");
82 $supported_formats{mods33}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
85 my $req = OpenSRF::AppSession
86 ->create('open-ils.cstore')
88 # XXX testing new metabib field use for faceting
89 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
90 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
94 if (ref $req and @$req) {
96 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
97 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
98 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
99 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
105 # --------------------------------------------------------------------------------
108 package OpenILS::Application::Ingest::Biblio;
109 use base qw/OpenILS::Application::Ingest/;
110 use Unicode::Normalize;
112 sub rw_biblio_ingest_single_object {
117 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
118 return undef unless ($blob);
120 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
121 $bib->quality( $blob->{fingerprint}->{quality} );
123 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
125 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
128 # update uri stuff ...
130 # gather URI call numbers for this record
131 my $uri_cns = $u->{call_number} = $cstore->request(
132 'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' }
136 # gather the maps for those call numbers
137 my $uri_maps = $u->{call_number} = $cstore->request(
138 'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns }
141 # delete the old maps
142 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps);
144 # and delete the call numbers if there are no more URIs
145 if (!@{ $blob->{uri} }) {
146 $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns);
150 # now, add CNs, URIs and maps
151 my %new_cns_by_owner;
152 my %new_uris_by_owner;
153 for my $u ( @{ $blob->{uri} } ) {
155 my $owner = $u->{call_number}->owning_lib;
157 if ($u->{call_number}->isnew) {
158 if ($new_cns_by_owner{$owner}) {
159 $u->{call_number} = $new_cns_by_owner{$owner};
161 $u->{call_number}->clear_id;
162 $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request(
163 'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number}
168 if ($u->{uri}->isnew) {
169 if ($new_uris_by_owner{$owner}) {
170 $u->{uri} = $new_uris_by_owner{$owner};
172 $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request(
173 'open-ils.cstore.direct.asset.uri.create' => $u->{uri}
178 my $umap = Fieldmapper::asset::uri_call_number_map->new;
179 $umap->uri($u->{uri}->id);
180 $umap->call_number($u->{call_number}->id);
182 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1) if (!$tmp);
185 # update full_rec stuff ...
186 $tmp = $cstore->request(
187 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
188 { record => $bib->id }
191 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
192 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
194 # update rec_descriptor stuff ...
195 $tmp = $cstore->request(
196 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
197 { record => $bib->id }
200 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
201 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
203 # deal with classed fields...
204 for my $class ( qw/title author subject keyword series/ ) {
205 $tmp = $cstore->request(
206 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
207 { source => $bib->id }
210 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
212 for my $obj ( @{ $blob->{field_entries} } ) {
213 my $class = $obj->class_name;
214 $class =~ s/^Fieldmapper:://o;
216 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
221 $tmp = $cstore->request(
222 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
223 { source => $bib->id }
226 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
229 my $old_mrs = $cstore->request(
230 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
231 )->gather(1) if (@$tmp);
233 $old_mrs = [] if (!ref($old_mrs));
236 for my $m (@$old_mrs) {
237 if ($m->fingerprint eq $bib->fingerprint) {
240 my $others = $cstore->request(
241 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
246 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
256 # Get the matchin MR, if any.
257 $mr = $cstore->request(
258 'open-ils.cstore.direct.metabib.metarecord.search',
259 { fingerprint => $bib->fingerprint }
262 $holds = $cstore->request(
263 'open-ils.cstore.direct.action.hold_request.search.atomic',
264 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
265 )->gather(1) if (@$old_mrs);
268 for my $h (@$holds) {
270 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
277 $mr = new Fieldmapper::metabib::metarecord;
278 $mr->fingerprint( $bib->fingerprint );
279 $mr->master_record( $bib->id );
282 "open-ils.cstore.direct.metabib.metarecord.create",
283 $mr => { quiet => 'true' }
287 for my $h (grep { !$_->ischanged } @$holds) {
289 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
292 my $mrm = $cstore->request(
293 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
294 { metarecord => $mr->id }
298 my $best = $cstore->request(
299 "open-ils.cstore.direct.biblio.record_entry.search",
300 { id => [ map { $_->source } @$mrm ] },
301 { 'select' => { bre => [ qw/id quality/ ] },
302 order_by => { bre => "quality desc" },
307 if ($best->quality > $bib->quality) {
308 $mr->master_record($best->id);
310 $mr->master_record($bib->id);
313 $mr->master_record($bib->id);
318 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
321 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
322 $mrm->source($bib->id);
323 $mrm->metarecord($mr->id);
325 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
326 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
328 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
333 __PACKAGE__->register_method(
334 api_name => "open-ils.ingest.full.biblio.object",
335 method => "rw_biblio_ingest_single_object",
340 sub rw_biblio_ingest_single_record {
345 OpenILS::Application::Ingest->post_init();
346 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
347 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
349 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
351 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
354 return undef unless ($r and @$r);
356 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
358 __PACKAGE__->register_method(
359 api_name => "open-ils.ingest.full.biblio.record",
360 method => "rw_biblio_ingest_single_record",
365 sub rw_biblio_ingest_record_list {
368 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
370 OpenILS::Application::Ingest->post_init();
371 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
372 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
374 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => \@rec } )->gather(1);
376 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
379 return undef unless ($r and @$r);
383 if (($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0]) {
389 __PACKAGE__->register_method(
390 api_name => "open-ils.ingest.full.biblio.record_list",
391 method => "rw_biblio_ingest_record_list",
396 sub ro_biblio_ingest_single_object {
400 my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
404 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
407 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
408 $max_cn = int($cn->id) + 1000;
412 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
413 $max_uri = int($cn->id) + 1000;
418 my $document = $parser->parse_string($xml);
420 my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib, $max_cn, $max_uri);
421 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
422 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
423 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
424 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
426 $_->source($bib->id) for (@mXfe);
427 $_->record($bib->id) for (@mfr);
428 $rd->record($bib->id) if ($rd);
430 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris };
432 __PACKAGE__->register_method(
433 api_name => "open-ils.ingest.full.biblio.object.readonly",
434 method => "ro_biblio_ingest_single_object",
439 sub ro_biblio_ingest_single_xml {
442 my $xml = OpenILS::Application::AppUtils->entityize(shift);
444 my $document = $parser->parse_string($xml);
446 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
447 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
448 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
449 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
451 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
453 __PACKAGE__->register_method(
454 api_name => "open-ils.ingest.full.biblio.xml.readonly",
455 method => "ro_biblio_ingest_single_xml",
460 sub ro_biblio_ingest_single_record {
465 OpenILS::Application::Ingest->post_init();
466 my $r = OpenSRF::AppSession
467 ->create('open-ils.cstore')
468 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
471 return undef unless ($r and @$r);
473 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
475 $_->source($rec) for (@{$res->{field_entries}});
476 $_->record($rec) for (@{$res->{full_rec}});
477 $res->{descriptor}->record($rec);
481 __PACKAGE__->register_method(
482 api_name => "open-ils.ingest.full.biblio.record.readonly",
483 method => "ro_biblio_ingest_single_record",
488 sub ro_biblio_ingest_stream_record {
492 OpenILS::Application::Ingest->post_init();
494 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
496 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
498 my $rec = $resp->content;
499 last unless (defined $rec);
501 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
502 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
504 $_->source($rec) for (@{$res->{field_entries}});
505 $_->record($rec) for (@{$res->{full_rec}});
507 $client->respond( $res );
512 __PACKAGE__->register_method(
513 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
514 method => "ro_biblio_ingest_stream_record",
519 sub ro_biblio_ingest_stream_xml {
523 OpenILS::Application::Ingest->post_init();
525 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
527 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
529 my $xml = $resp->content;
530 last unless (defined $xml);
532 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
533 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
535 $client->respond( $res );
540 __PACKAGE__->register_method(
541 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
542 method => "ro_biblio_ingest_stream_xml",
547 sub rw_biblio_ingest_stream_import {
551 OpenILS::Application::Ingest->post_init();
553 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
555 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
557 my $bib = $resp->content;
558 last unless (defined $bib);
560 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
561 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
563 $_->source($bib->id) for (@{$res->{field_entries}});
564 $_->record($bib->id) for (@{$res->{full_rec}});
566 $client->respond( $res );
571 __PACKAGE__->register_method(
572 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
573 method => "rw_biblio_ingest_stream_import",
579 # --------------------------------------------------------------------------------
582 package OpenILS::Application::Ingest::Authority;
583 use base qw/OpenILS::Application::Ingest/;
584 use Unicode::Normalize;
586 sub ro_authority_ingest_single_object {
590 my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
592 my $document = $parser->parse_string($xml);
594 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
596 $_->record($bib->id) for (@mfr);
598 return { full_rec => \@mfr };
600 __PACKAGE__->register_method(
601 api_name => "open-ils.ingest.full.authority.object.readonly",
602 method => "ro_authority_ingest_single_object",
606 __PACKAGE__->register_method(
607 api_name => "open-ils.ingest.full.serial.object.readonly",
608 method => "ro_authority_ingest_single_object",
614 sub ro_authority_ingest_single_xml {
617 my $xml = OpenILS::Application::AppUtils->entityize(shift);
619 my $document = $parser->parse_string($xml);
621 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
623 return { full_rec => \@mfr };
625 __PACKAGE__->register_method(
626 api_name => "open-ils.ingest.full.authority.xml.readonly",
627 method => "ro_authority_ingest_single_xml",
632 sub ro_authority_ingest_single_record {
637 OpenILS::Application::Ingest->post_init();
638 my $r = OpenSRF::AppSession
639 ->create('open-ils.cstore')
640 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
643 return undef unless ($r and @$r);
645 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
647 $_->record($rec) for (@{$res->{full_rec}});
648 $res->{descriptor}->record($rec);
652 __PACKAGE__->register_method(
653 api_name => "open-ils.ingest.full.authority.record.readonly",
654 method => "ro_authority_ingest_single_record",
659 sub ro_authority_ingest_stream_record {
663 OpenILS::Application::Ingest->post_init();
665 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
667 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
669 my $rec = $resp->content;
670 last unless (defined $rec);
672 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
673 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
675 $_->record($rec) for (@{$res->{full_rec}});
677 $client->respond( $res );
682 __PACKAGE__->register_method(
683 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
684 method => "ro_authority_ingest_stream_record",
689 sub ro_authority_ingest_stream_xml {
693 OpenILS::Application::Ingest->post_init();
695 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
697 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
699 my $xml = $resp->content;
700 last unless (defined $xml);
702 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
703 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
705 $client->respond( $res );
710 __PACKAGE__->register_method(
711 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
712 method => "ro_authority_ingest_stream_xml",
717 sub rw_authority_ingest_stream_import {
721 OpenILS::Application::Ingest->post_init();
723 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
725 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
727 my $bib = $resp->content;
728 last unless (defined $bib);
730 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
731 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
733 $_->record($bib->id) for (@{$res->{full_rec}});
735 $client->respond( $res );
740 __PACKAGE__->register_method(
741 api_name => "open-ils.ingest.full.authority.bib_stream.import",
742 method => "rw_authority_ingest_stream_import",
747 # --------------------------------------------------------------------------------
750 package OpenILS::Application::Ingest::Serial;
751 use base qw/OpenILS::Application::Ingest/;
752 use Unicode::Normalize;
754 sub ro_serial_ingest_single_object {
758 my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
760 my $document = $parser->parse_string($xml);
762 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.serial.xml")->run($document);
764 $_->record($bib->id) for (@mfr);
766 return { full_rec => \@mfr };
768 __PACKAGE__->register_method(
769 api_name => "open-ils.ingest.full.serial.object.readonly",
770 method => "ro_serial_ingest_single_object",
775 sub ro_serial_ingest_single_xml {
778 my $xml = OpenILS::Application::AppUtils->entityize(shift);
780 my $document = $parser->parse_string($xml);
782 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.serial.xml")->run($document);
784 return { full_rec => \@mfr };
786 __PACKAGE__->register_method(
787 api_name => "open-ils.ingest.full.serial.xml.readonly",
788 method => "ro_serial_ingest_single_xml",
793 sub ro_serial_ingest_single_record {
798 OpenILS::Application::Ingest->post_init();
799 my $r = OpenSRF::AppSession
800 ->create('open-ils.cstore')
801 ->request( 'open-ils.cstore.direct.serial.record_entry.retrieve' => $rec )
804 return undef unless ($r and @$r);
806 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.xml.readonly")->run($r->marc);
808 $_->record($rec) for (@{$res->{full_rec}});
809 $res->{descriptor}->record($rec);
813 __PACKAGE__->register_method(
814 api_name => "open-ils.ingest.full.serial.record.readonly",
815 method => "ro_serial_ingest_single_record",
820 sub ro_serial_ingest_stream_record {
824 OpenILS::Application::Ingest->post_init();
826 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
828 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
830 my $rec = $resp->content;
831 last unless (defined $rec);
833 $log->debug("Running open-ils.ingest.full.serial.record.readonly ...");
834 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.record.readonly")->run($rec);
836 $_->record($rec) for (@{$res->{full_rec}});
838 $client->respond( $res );
843 __PACKAGE__->register_method(
844 api_name => "open-ils.ingest.full.serial.record_stream.readonly",
845 method => "ro_serial_ingest_stream_record",
850 sub ro_serial_ingest_stream_xml {
854 OpenILS::Application::Ingest->post_init();
856 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
858 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
860 my $xml = $resp->content;
861 last unless (defined $xml);
863 $log->debug("Running open-ils.ingest.full.serial.xml.readonly ...");
864 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.xml.readonly")->run($xml);
866 $client->respond( $res );
871 __PACKAGE__->register_method(
872 api_name => "open-ils.ingest.full.serial.xml_stream.readonly",
873 method => "ro_serial_ingest_stream_xml",
878 sub rw_serial_ingest_stream_import {
882 OpenILS::Application::Ingest->post_init();
884 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
886 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
888 my $bib = $resp->content;
889 last unless (defined $bib);
891 $log->debug("Running open-ils.ingest.full.serial.xml.readonly ...");
892 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.xml.readonly")->run($bib->marc);
894 $_->record($bib->id) for (@{$res->{full_rec}});
896 $client->respond( $res );
901 __PACKAGE__->register_method(
902 api_name => "open-ils.ingest.full.serial.bib_stream.import",
903 method => "rw_serial_ingest_stream_import",
909 # --------------------------------------------------------------------------------
910 # MARC index extraction
912 package OpenILS::Application::Ingest::XPATH;
913 use base qw/OpenILS::Application::Ingest/;
914 use Unicode::Normalize;
916 # give this an XML documentElement and an XPATH expression
917 sub xpath_to_string {
921 my $ns_prefix = shift;
924 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
928 # grab the set of matching nodes
929 my @nodes = $xml->findnodes( $xpath );
930 for my $value (@nodes) {
932 # grab all children of the node
933 my @children = $value->childNodes();
934 for my $child (@children) {
936 # add the childs content to the growing buffer
937 my $content = quotemeta($child->textContent);
938 next if ($unique && $string =~ /$content/); # uniquify the values
939 $string .= $child->textContent . " ";
942 $string .= $value->textContent . " ";
946 $string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
947 $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
952 sub class_index_string_xml {
958 OpenILS::Application::Ingest->post_init();
959 $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml);
963 for my $class (@classes) {
964 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
965 for my $type ( keys %{ $xpathset->{$class} } ) {
967 my $def = $xpathset->{$class}->{$type};
968 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
973 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
974 $transform_cache{$def->{format}} = $document;
977 my $value = xpath_to_string(
978 $document->documentElement => $def->{xpath},
979 $sf->{ns} => $def->{format},
985 $value = NFD($value);
986 $value =~ s/\pM+//sgo;
987 $value =~ s/\pC+//sgo;
988 $value =~ s/\W+$//sgo;
990 $value =~ s/\b\.+\b//sgo;
993 my $fm = $class_constructor->new;
994 $fm->value( $value );
995 $fm->field( $xpathset->{$class}->{$type}->{id} );
996 $client->respond($fm);
1001 __PACKAGE__->register_method(
1002 api_name => "open-ils.ingest.field_entry.class.xml",
1003 method => "class_index_string_xml",
1009 sub class_index_string_record {
1013 my @classes = shift;
1015 OpenILS::Application::Ingest->post_init();
1016 my $r = OpenSRF::AppSession
1017 ->create('open-ils.cstore')
1018 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
1021 return undef unless ($r and @$r);
1023 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
1025 $client->respond($fm);
1029 __PACKAGE__->register_method(
1030 api_name => "open-ils.ingest.field_entry.class.record",
1031 method => "class_index_string_record",
1037 sub all_index_string_xml {
1042 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
1043 $client->respond($fm);
1047 __PACKAGE__->register_method(
1048 api_name => "open-ils.ingest.extract.field_entry.all.xml",
1049 method => "all_index_string_xml",
1055 sub all_index_string_record {
1060 OpenILS::Application::Ingest->post_init();
1061 my $r = OpenSRF::AppSession
1062 ->create('open-ils.cstore')
1063 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1066 return undef unless ($r and @$r);
1068 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
1070 $client->respond($fm);
1074 __PACKAGE__->register_method(
1075 api_name => "open-ils.ingest.extract.field_entry.all.record",
1076 method => "all_index_string_record",
1082 # --------------------------------------------------------------------------------
1085 package OpenILS::Application::Ingest::FlatMARC;
1086 use base qw/OpenILS::Application::Ingest/;
1087 use Unicode::Normalize;
1090 sub _marcxml_to_full_rows {
1092 my $marcxml = shift;
1093 my $xmltype = shift || 'metabib';
1095 my $type = "Fieldmapper::${xmltype}::full_rec";
1099 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1101 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1102 next unless $tagline;
1104 my $ns = $type->new;
1107 my $val = $tagline->textContent;
1109 $val =~ s/\pM+//sgo;
1110 $val =~ s/\pC+//sgo;
1111 $val =~ s/\W+$//sgo;
1117 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1118 next unless $tagline;
1120 my $ns = $type->new;
1122 $ns->tag( $tagline->getAttribute( "tag" ) );
1123 my $val = $tagline->textContent;
1125 $val =~ s/\pM+//sgo;
1126 $val =~ s/\pC+//sgo;
1127 $val =~ s/\W+$//sgo;
1133 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1134 next unless $tagline;
1136 my $tag = $tagline->getAttribute( "tag" );
1137 my $ind1 = $tagline->getAttribute( "ind1" );
1138 my $ind2 = $tagline->getAttribute( "ind2" );
1140 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1143 my $ns = $type->new;
1148 $ns->subfield( $data->getAttribute( "code" ) );
1149 my $val = $data->textContent;
1151 $val =~ s/\pM+//sgo;
1152 $val =~ s/\pC+//sgo;
1153 $val =~ s/\W+$//sgo;
1154 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
1155 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
1156 $ns->value( lc($val) );
1161 if ($xmltype eq 'metabib' and $tag eq '245') {
1164 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1165 next unless ($data and $data->getAttribute( "code" ) eq 'a');
1172 $ns->subfield( $data->getAttribute( "code" ) );
1173 my $val = substr( $data->textContent, $ind2 );
1175 $val =~ s/\pM+//sgo;
1176 $val =~ s/\pC+//sgo;
1177 $val =~ s/\W+$//sgo;
1178 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
1179 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
1180 $ns->value( lc($val) );
1187 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
1196 $log->debug("processing [$xml]");
1198 $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml);
1200 my $type = 'metabib';
1201 $type = 'authority' if ($self->api_name =~ /authority/o);
1202 $type = 'serial' if ($self->api_name =~ /serial/o);
1204 OpenILS::Application::Ingest->post_init();
1206 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1209 __PACKAGE__->register_method(
1210 api_name => "open-ils.ingest.flat_marc.authority.xml",
1211 method => "flat_marc_xml",
1216 __PACKAGE__->register_method(
1217 api_name => "open-ils.ingest.flat_marc.biblio.xml",
1218 method => "flat_marc_xml",
1223 __PACKAGE__->register_method(
1224 api_name => "open-ils.ingest.flat_marc.serial.xml",
1225 method => "flat_marc_xml",
1231 sub flat_marc_record {
1236 my $type = 'biblio';
1237 $type = 'authority' if ($self->api_name =~ /authority/o);
1238 $type = 'serial' if ($self->api_name =~ /serial/o);
1240 OpenILS::Application::Ingest->post_init();
1241 my $r = OpenSRF::AppSession
1242 ->create('open-ils.cstore')
1243 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
1247 return undef unless ($r and $r->marc);
1249 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
1250 for my $row (@rows) {
1251 $client->respond($row);
1252 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
1256 __PACKAGE__->register_method(
1257 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
1258 method => "flat_marc_record",
1263 __PACKAGE__->register_method(
1264 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
1265 method => "flat_marc_record",
1270 __PACKAGE__->register_method(
1271 api_name => "open-ils.ingest.flat_marc.serial.record_entry",
1272 method => "flat_marc_record",
1278 # --------------------------------------------------------------------------------
1281 package OpenILS::Application::Ingest::Biblio::URI;
1282 use base qw/OpenILS::Application::Ingest/;
1283 use Unicode::Normalize;
1284 use OpenSRF::EX qw/:try/;
1287 sub _extract_856_uris {
1291 my $max_uri = shift;
1294 my $recid = $rec->id;
1295 my $marcxml = $rec->marc;
1297 my $document = $parser->parse_string($marcxml);
1298 my @nodes = $document->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]');
1300 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
1304 for my $node (@nodes) {
1305 # first, is there a URI?
1306 my $href = $node->findvalue('*[local-name()="subfield" and @code="u"]/text()');
1307 next unless ($href);
1309 # now, find the best possible label
1310 my $label = $node->findvalue('*[local-name()="subfield" and @code="y"]/text()');
1311 $label ||= $node->findvalue('*[local-name()="subfield" and @code="3"]/text()');
1315 my $use = $node->findvalue('*[local-name()="subfield" and @code="z"]/text()');
1316 $use ||= $node->findvalue('*[local-name()="subfield" and @code="2"]/text()');
1317 $use ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1319 # moving on to the URI owner
1320 my $owner = $node->findvalue('*[local-name()="subfield" and @code="w"]/text()');
1321 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1322 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce
1324 $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ...
1326 # no owner? skip it :(
1327 next unless ($owner);
1330 ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} )
1335 # now we can construct the uri object
1337 ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use_restriction => $use, active => 't' } )
1341 $uri = Fieldmapper::asset::uri->new;
1343 $uri->id( $$max_uri++ );
1344 $uri->label($label);
1347 $uri->use_restriction($use);
1350 # see if we need to create a call number
1351 my $cn = $cn_cache{$org->id};
1352 $cn = $cn->clone if ($cn);
1353 $cn->clear_isnew if ($cn);
1356 ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owning_lib => $org->id, record => $recid, label => '##URI##' } )
1360 $cn = Fieldmapper::asset::call_number->new;
1363 $cn->id( $$max_cn++ );
1364 $cn->owning_lib( $org->id );
1365 $cn->record( $recid );
1366 $cn->create_date( 'now' );
1367 $cn->creator( $rec->creator );
1368 $cn->editor( $rec->editor );
1369 $cn->edit_date( 'now' );
1370 $cn->label( '##URI##' );
1373 $cn_cache{$org->id} = $cn;
1375 push @objects, { uri => $uri, call_number => $cn };
1378 $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid");
1379 $cstore->disconnect;
1383 sub get_uris_record {
1388 OpenILS::Application::Ingest->post_init();
1389 my $r = OpenSRF::AppSession
1390 ->create('open-ils.cstore')
1391 ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec )
1394 return undef unless ($r and $r->marc);
1396 $client->respond($_) for (_extract_856_uris($r));
1399 __PACKAGE__->register_method(
1400 api_name => "open-ils.ingest.856_uri.record",
1401 method => "get_uris_record",
1407 sub get_uris_object {
1412 my $max_uri = shift;
1414 return undef unless ($obj and $obj->marc);
1416 $client->respond($_) for (_extract_856_uris($obj, \$max_cn, \$max_uri));
1419 __PACKAGE__->register_method(
1420 api_name => "open-ils.ingest.856_uri.object",
1421 method => "get_uris_object",
1428 # --------------------------------------------------------------------------------
1431 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1432 use base qw/OpenILS::Application::Ingest/;
1433 use Unicode::Normalize;
1434 use OpenSRF::EX qw/:try/;
1436 sub biblio_fingerprint_record {
1441 OpenILS::Application::Ingest->post_init();
1443 my $r = OpenSRF::AppSession
1444 ->create('open-ils.cstore')
1445 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1448 return undef unless ($r and $r->marc);
1450 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1451 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1452 $fp->{quality} = int($fp->{quality});
1455 __PACKAGE__->register_method(
1456 api_name => "open-ils.ingest.fingerprint.record",
1457 method => "biblio_fingerprint_record",
1463 sub biblio_fingerprint {
1466 my $xml = OpenILS::Application::AppUtils->entityize(shift);
1468 $log->internal("Got MARC [$xml]");
1471 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1472 my $conf = OpenSRF::Utils::SettingsClient->new;
1474 my $libs = $conf->config_value(@pfx, 'script_path');
1475 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1476 my $script_libs = (ref($libs)) ? $libs : [$libs];
1478 $log->debug("Loading script $script_file for biblio fingerprinting...");
1480 $fp_script = new OpenILS::Utils::ScriptRunner
1481 ( file => $script_file,
1482 paths => $script_libs,
1483 reset_count => 100 );
1486 $fp_script->insert('environment' => {marc => $xml} => 1);
1488 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1489 $log->debug("Script for biblio fingerprinting completed successfully...");
1493 __PACKAGE__->register_method(
1494 api_name => "open-ils.ingest.fingerprint.xml",
1495 method => "biblio_fingerprint",
1501 sub biblio_descriptor {
1504 my $xml = OpenILS::Application::AppUtils->entityize(shift);
1506 $log->internal("Got MARC [$xml]");
1509 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1510 my $conf = OpenSRF::Utils::SettingsClient->new;
1512 my $libs = $conf->config_value(@pfx, 'script_path');
1513 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1514 my $script_libs = (ref($libs)) ? $libs : [$libs];
1516 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1518 $rd_script = new OpenILS::Utils::ScriptRunner
1519 ( file => $script_file,
1520 paths => $script_libs,
1521 reset_count => 100 );
1524 $log->debug("Setting up environment for descriptor extraction script...");
1525 $rd_script->insert('environment.marc' => $xml => 1);
1526 $log->debug("Environment building complete...");
1528 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1529 $log->debug("Script for biblio descriptor extraction completed successfully");
1531 my $d1 = $res->date1;
1532 if ($d1 && $d1 ne ' ') {
1537 my $d2 = $res->date2;
1538 if ($d2 && $d2 ne ' ') {
1545 __PACKAGE__->register_method(
1546 api_name => "open-ils.ingest.descriptor.xml",
1547 method => "biblio_descriptor",