1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::ScriptRunner;
14 use OpenILS::Utils::Fieldmapper;
15 use OpenSRF::Utils::JSON;
17 use OpenILS::Utils::Fieldmapper;
21 use Time::HiRes qw(time);
23 our %supported_formats = (
24 mods33 => {ns => 'http://www.loc.gov/mods/v3'},
25 mods32 => {ns => 'http://www.loc.gov/mods/v3'},
26 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
27 mods => {ns => 'http://www.loc.gov/mods/'},
28 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
29 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
30 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
31 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
32 atom => {ns => 'http://www.w3.org/2005/Atom'},
33 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
37 rss10 => {ns => 'http://purl.org/rss/1.0/'},
38 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
43 my $log = 'OpenSRF::Utils::Logger';
45 my $parser = XML::LibXML->new();
46 my $xslt = XML::LibXSLT->new();
56 unless (keys %$xpathset) {
57 $log->debug("Running post_init", DEBUG);
59 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
61 unless ($supported_formats{mods}{xslt}) {
62 $log->debug("Loading MODS XSLT", DEBUG);
63 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
64 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
67 unless ($supported_formats{mods3}{xslt}) {
68 $log->debug("Loading MODS v3 XSLT", DEBUG);
69 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
70 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
73 unless ($supported_formats{mods32}{xslt}) {
74 $log->debug("Loading MODS v32 XSLT", DEBUG);
75 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
76 $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
79 unless ($supported_formats{mods33}{xslt}) {
80 $log->debug("Loading MODS v33 XSLT", DEBUG);
81 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS33.xsl");
82 $supported_formats{mods33}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
85 my $req = OpenSRF::AppSession
86 ->create('open-ils.cstore')
88 # XXX testing new metabib field use for faceting
89 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
90 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
94 if (ref $req and @$req) {
96 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
97 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
98 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
99 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
105 # --------------------------------------------------------------------------------
108 package OpenILS::Application::Ingest::Biblio;
109 use base qw/OpenILS::Application::Ingest/;
110 use Unicode::Normalize;
112 sub rw_biblio_ingest_single_object {
117 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
118 return undef unless ($blob);
120 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
121 $bib->quality( $blob->{fingerprint}->{quality} );
123 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
125 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
128 # update uri stuff ...
130 # gather URI call numbers for this record
131 my $uri_cns = $u->{call_number} = $cstore->request(
132 'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' }
136 # gather the maps for those call numbers
137 my $uri_maps = $u->{call_number} = $cstore->request(
138 'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns }
141 # delete the old maps
142 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps);
144 # and delete the call numbers if there are no more URIs
145 if (!@{ $blob->{uri} }) {
146 $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns);
150 # now, add CNs, URIs and maps
151 my %new_cns_by_owner;
152 my %new_uris_by_owner;
153 for my $u ( @{ $blob->{uri} } ) {
155 my $owner = $u->{call_number}->owning_lib;
157 if ($u->{call_number}->isnew) {
158 if ($new_cns_by_owner{$owner}) {
159 $u->{call_number} = $new_cns_by_owner{$owner};
161 $u->{call_number}->clear_id;
162 $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request(
163 'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number}
168 if ($u->{uri}->isnew) {
169 if ($new_uris_by_owner{$owner}) {
170 $u->{uri} = $new_uris_by_owner{$owner};
172 $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request(
173 'open-ils.cstore.direct.asset.uri.create' => $u->{uri}
178 # Check for an existing CN-URI map
179 $tmp = $cstore->request(
180 'open-ils.cstore.direct.asset.uri_call_number_map.id_list',
181 { call_number => $u->{call_number}->id, uri => $u->{uri}->id }
186 my $umap = Fieldmapper::asset::uri_call_number_map->new;
187 $umap->uri($u->{uri}->id);
188 $umap->call_number($u->{call_number}->id);
190 $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1);
193 # update full_rec stuff ...
194 $tmp = $cstore->request(
195 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
196 { record => $bib->id }
199 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
200 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
202 # update rec_descriptor stuff ...
203 $tmp = $cstore->request(
204 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
205 { record => $bib->id }
208 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
209 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
211 # deal with classed fields...
212 for my $class ( qw/title author subject keyword series/ ) {
213 $tmp = $cstore->request(
214 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
215 { source => $bib->id }
218 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
220 for my $obj ( @{ $blob->{field_entries} } ) {
221 my $class = $obj->class_name;
222 $class =~ s/^Fieldmapper:://o;
224 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
229 $tmp = $cstore->request(
230 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
231 { source => $bib->id }
234 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
237 my $old_mrs = $cstore->request(
238 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
239 )->gather(1) if (@$tmp);
241 $old_mrs = [] if (!ref($old_mrs));
244 for my $m (@$old_mrs) {
245 if ($m->fingerprint eq $bib->fingerprint) {
248 my $others = $cstore->request(
249 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
254 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
264 # Get the matchin MR, if any.
265 $mr = $cstore->request(
266 'open-ils.cstore.direct.metabib.metarecord.search',
267 { fingerprint => $bib->fingerprint }
270 $holds = $cstore->request(
271 'open-ils.cstore.direct.action.hold_request.search.atomic',
272 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
273 )->gather(1) if (@$old_mrs);
276 for my $h (@$holds) {
278 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
285 $mr = new Fieldmapper::metabib::metarecord;
286 $mr->fingerprint( $bib->fingerprint );
287 $mr->master_record( $bib->id );
290 "open-ils.cstore.direct.metabib.metarecord.create",
291 $mr => { quiet => 'true' }
295 for my $h (grep { !$_->ischanged } @$holds) {
297 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
300 my $mrm = $cstore->request(
301 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
302 { metarecord => $mr->id }
306 my $best = $cstore->request(
307 "open-ils.cstore.direct.biblio.record_entry.search",
308 { id => [ map { $_->source } @$mrm ] },
309 { 'select' => { bre => [ qw/id quality/ ] },
310 order_by => { bre => "quality desc" },
315 if ($best->quality > $bib->quality) {
316 $mr->master_record($best->id);
318 $mr->master_record($bib->id);
321 $mr->master_record($bib->id);
326 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
329 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
330 $mrm->source($bib->id);
331 $mrm->metarecord($mr->id);
333 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
334 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
336 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
341 __PACKAGE__->register_method(
342 api_name => "open-ils.ingest.full.biblio.object",
343 method => "rw_biblio_ingest_single_object",
348 sub rw_biblio_ingest_single_record {
353 OpenILS::Application::Ingest->post_init();
354 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
355 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
357 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
359 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
362 return undef unless ($r and @$r);
364 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
366 __PACKAGE__->register_method(
367 api_name => "open-ils.ingest.full.biblio.record",
368 method => "rw_biblio_ingest_single_record",
373 sub rw_biblio_ingest_record_list {
376 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
378 OpenILS::Application::Ingest->post_init();
379 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
380 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
382 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => \@rec } )->gather(1);
384 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
387 return undef unless ($r and @$r);
391 if (($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0]) {
397 __PACKAGE__->register_method(
398 api_name => "open-ils.ingest.full.biblio.record_list",
399 method => "rw_biblio_ingest_record_list",
404 sub ro_biblio_ingest_single_object {
408 my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
412 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
415 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
416 $max_cn = int($cn->id) + 1000;
420 my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1);
421 $max_uri = int($cn->id) + 1000;
426 my $document = $parser->parse_string($xml);
428 my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib, $max_cn, $max_uri);
429 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
430 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
431 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
432 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
434 $_->source($bib->id) for (@mXfe);
435 $_->record($bib->id) for (@mfr);
436 $rd->record($bib->id) if ($rd);
438 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris };
440 __PACKAGE__->register_method(
441 api_name => "open-ils.ingest.full.biblio.object.readonly",
442 method => "ro_biblio_ingest_single_object",
447 sub ro_biblio_ingest_single_xml {
450 my $xml = OpenILS::Application::AppUtils->entityize(shift);
452 my $document = $parser->parse_string($xml);
454 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
455 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
456 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
457 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
459 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
461 __PACKAGE__->register_method(
462 api_name => "open-ils.ingest.full.biblio.xml.readonly",
463 method => "ro_biblio_ingest_single_xml",
468 sub ro_biblio_ingest_single_record {
473 OpenILS::Application::Ingest->post_init();
474 my $r = OpenSRF::AppSession
475 ->create('open-ils.cstore')
476 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
479 return undef unless ($r and @$r);
481 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
483 $_->source($rec) for (@{$res->{field_entries}});
484 $_->record($rec) for (@{$res->{full_rec}});
485 $res->{descriptor}->record($rec);
489 __PACKAGE__->register_method(
490 api_name => "open-ils.ingest.full.biblio.record.readonly",
491 method => "ro_biblio_ingest_single_record",
496 sub ro_biblio_ingest_stream_record {
500 OpenILS::Application::Ingest->post_init();
502 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
504 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
506 my $rec = $resp->content;
507 last unless (defined $rec);
509 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
510 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
512 $_->source($rec) for (@{$res->{field_entries}});
513 $_->record($rec) for (@{$res->{full_rec}});
515 $client->respond( $res );
520 __PACKAGE__->register_method(
521 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
522 method => "ro_biblio_ingest_stream_record",
527 sub ro_biblio_ingest_stream_xml {
531 OpenILS::Application::Ingest->post_init();
533 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
535 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
537 my $xml = $resp->content;
538 last unless (defined $xml);
540 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
541 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
543 $client->respond( $res );
548 __PACKAGE__->register_method(
549 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
550 method => "ro_biblio_ingest_stream_xml",
555 sub rw_biblio_ingest_stream_import {
559 OpenILS::Application::Ingest->post_init();
561 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
563 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
565 my $bib = $resp->content;
566 last unless (defined $bib);
568 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
569 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
571 $_->source($bib->id) for (@{$res->{field_entries}});
572 $_->record($bib->id) for (@{$res->{full_rec}});
574 $client->respond( $res );
579 __PACKAGE__->register_method(
580 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
581 method => "rw_biblio_ingest_stream_import",
587 # --------------------------------------------------------------------------------
590 package OpenILS::Application::Ingest::Authority;
591 use base qw/OpenILS::Application::Ingest/;
592 use Unicode::Normalize;
594 sub rw_authority_ingest_single_object {
599 my ($blob) = $self->method_lookup("open-ils.ingest.full.authority.object.readonly")->run($auth);
600 return undef unless ($blob);
602 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
604 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
607 # update full_rec stuff ...
608 $tmp = $cstore->request(
609 'open-ils.cstore.direct.authority.full_rec.id_list.atomic',
610 { record => $auth->id }
613 $cstore->request( 'open-ils.cstore.direct.authority.full_rec.delete' => $_ )->gather(1) for (@$tmp);
614 $cstore->request( 'open-ils.cstore.direct.authority.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
616 # XXX when we start extracting authority descriptors and adding sources ...
618 # update rec_descriptor stuff ...
619 #$tmp = $cstore->request(
620 # 'open-ils.cstore.direct.authority.record_descriptor.id_list.atomic',
621 # { record => $auth->id }
624 #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
625 #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.create' => $blob->{descriptor} )->gather(1);
626 #$cstore->request( 'open-ils.cstore.direct.authority.record_entry.update' => $auth )->gather(1);
628 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
633 __PACKAGE__->register_method(
634 api_name => "open-ils.ingest.full.authority.object",
635 method => "rw_authority_ingest_single_object",
640 sub rw_authority_ingest_single_record {
645 OpenILS::Application::Ingest->post_init();
646 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
647 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
649 my $r = $cstore->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )->gather(1);
651 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
654 return undef unless ($r and @$r);
656 return ($self->method_lookup("open-ils.ingest.full.authority.object")->run($r))[0];
658 __PACKAGE__->register_method(
659 api_name => "open-ils.ingest.full.authority.record",
660 method => "rw_authority_ingest_single_record",
665 sub ro_authority_ingest_single_object {
669 my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
671 my $document = $parser->parse_string($xml);
673 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
675 $_->record($bib->id) for (@mfr);
677 return { full_rec => \@mfr };
679 __PACKAGE__->register_method(
680 api_name => "open-ils.ingest.full.authority.object.readonly",
681 method => "ro_authority_ingest_single_object",
685 __PACKAGE__->register_method(
686 api_name => "open-ils.ingest.full.serial.object.readonly",
687 method => "ro_authority_ingest_single_object",
693 sub ro_authority_ingest_single_xml {
696 my $xml = OpenILS::Application::AppUtils->entityize(shift);
698 my $document = $parser->parse_string($xml);
700 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
702 return { full_rec => \@mfr };
704 __PACKAGE__->register_method(
705 api_name => "open-ils.ingest.full.authority.xml.readonly",
706 method => "ro_authority_ingest_single_xml",
711 sub ro_authority_ingest_single_record {
716 OpenILS::Application::Ingest->post_init();
717 my $r = OpenSRF::AppSession
718 ->create('open-ils.cstore')
719 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
722 return undef unless ($r and @$r);
724 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
726 $_->record($rec) for (@{$res->{full_rec}});
727 $res->{descriptor}->record($rec);
731 __PACKAGE__->register_method(
732 api_name => "open-ils.ingest.full.authority.record.readonly",
733 method => "ro_authority_ingest_single_record",
738 sub ro_authority_ingest_stream_record {
742 OpenILS::Application::Ingest->post_init();
744 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
746 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
748 my $rec = $resp->content;
749 last unless (defined $rec);
751 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
752 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
754 $_->record($rec) for (@{$res->{full_rec}});
756 $client->respond( $res );
761 __PACKAGE__->register_method(
762 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
763 method => "ro_authority_ingest_stream_record",
768 sub ro_authority_ingest_stream_xml {
772 OpenILS::Application::Ingest->post_init();
774 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
776 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
778 my $xml = $resp->content;
779 last unless (defined $xml);
781 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
782 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
784 $client->respond( $res );
789 __PACKAGE__->register_method(
790 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
791 method => "ro_authority_ingest_stream_xml",
796 sub rw_authority_ingest_stream_import {
800 OpenILS::Application::Ingest->post_init();
802 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
804 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
806 my $bib = $resp->content;
807 last unless (defined $bib);
809 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
810 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
812 $_->record($bib->id) for (@{$res->{full_rec}});
814 $client->respond( $res );
819 __PACKAGE__->register_method(
820 api_name => "open-ils.ingest.full.authority.bib_stream.import",
821 method => "rw_authority_ingest_stream_import",
826 # --------------------------------------------------------------------------------
829 package OpenILS::Application::Ingest::Serial;
830 use base qw/OpenILS::Application::Ingest/;
831 use Unicode::Normalize;
833 sub ro_serial_ingest_single_object {
837 my $xml = OpenILS::Application::AppUtils->entityize($bib->marc);
839 my $document = $parser->parse_string($xml);
841 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.serial.xml")->run($document);
843 $_->record($bib->id) for (@mfr);
845 return { full_rec => \@mfr };
847 __PACKAGE__->register_method(
848 api_name => "open-ils.ingest.full.serial.object.readonly",
849 method => "ro_serial_ingest_single_object",
854 sub ro_serial_ingest_single_xml {
857 my $xml = OpenILS::Application::AppUtils->entityize(shift);
859 my $document = $parser->parse_string($xml);
861 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.serial.xml")->run($document);
863 return { full_rec => \@mfr };
865 __PACKAGE__->register_method(
866 api_name => "open-ils.ingest.full.serial.xml.readonly",
867 method => "ro_serial_ingest_single_xml",
872 sub ro_serial_ingest_single_record {
877 OpenILS::Application::Ingest->post_init();
878 my $r = OpenSRF::AppSession
879 ->create('open-ils.cstore')
880 ->request( 'open-ils.cstore.direct.serial.record_entry.retrieve' => $rec )
883 return undef unless ($r and @$r);
885 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.xml.readonly")->run($r->marc);
887 $_->record($rec) for (@{$res->{full_rec}});
888 $res->{descriptor}->record($rec);
892 __PACKAGE__->register_method(
893 api_name => "open-ils.ingest.full.serial.record.readonly",
894 method => "ro_serial_ingest_single_record",
899 sub ro_serial_ingest_stream_record {
903 OpenILS::Application::Ingest->post_init();
905 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
907 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
909 my $rec = $resp->content;
910 last unless (defined $rec);
912 $log->debug("Running open-ils.ingest.full.serial.record.readonly ...");
913 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.record.readonly")->run($rec);
915 $_->record($rec) for (@{$res->{full_rec}});
917 $client->respond( $res );
922 __PACKAGE__->register_method(
923 api_name => "open-ils.ingest.full.serial.record_stream.readonly",
924 method => "ro_serial_ingest_stream_record",
929 sub ro_serial_ingest_stream_xml {
933 OpenILS::Application::Ingest->post_init();
935 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
937 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
939 my $xml = $resp->content;
940 last unless (defined $xml);
942 $log->debug("Running open-ils.ingest.full.serial.xml.readonly ...");
943 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.xml.readonly")->run($xml);
945 $client->respond( $res );
950 __PACKAGE__->register_method(
951 api_name => "open-ils.ingest.full.serial.xml_stream.readonly",
952 method => "ro_serial_ingest_stream_xml",
957 sub rw_serial_ingest_stream_import {
961 OpenILS::Application::Ingest->post_init();
963 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
965 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
967 my $bib = $resp->content;
968 last unless (defined $bib);
970 $log->debug("Running open-ils.ingest.full.serial.xml.readonly ...");
971 my ($res) = $self->method_lookup("open-ils.ingest.full.serial.xml.readonly")->run($bib->marc);
973 $_->record($bib->id) for (@{$res->{full_rec}});
975 $client->respond( $res );
980 __PACKAGE__->register_method(
981 api_name => "open-ils.ingest.full.serial.bib_stream.import",
982 method => "rw_serial_ingest_stream_import",
988 # --------------------------------------------------------------------------------
989 # MARC index extraction
991 package OpenILS::Application::Ingest::XPATH;
992 use base qw/OpenILS::Application::Ingest/;
993 use Unicode::Normalize;
995 # give this an XML documentElement and an XPATH expression
996 sub xpath_to_string {
1000 my $ns_prefix = shift;
1003 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1007 # grab the set of matching nodes
1008 my @nodes = $xml->findnodes( $xpath );
1009 for my $value (@nodes) {
1011 # grab all children of the node
1012 my @children = $value->childNodes();
1013 for my $child (@children) {
1015 # add the childs content to the growing buffer
1016 my $content = quotemeta($child->textContent);
1017 next if ($unique && $string =~ /$content/); # uniquify the values
1018 $string .= $child->textContent . " ";
1021 $string .= $value->textContent . " ";
1025 $string =~ s/(\w+)\/(\w+)/$1 $2/sgo;
1026 $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
1028 return NFD($string);
1031 sub class_index_string_xml {
1037 OpenILS::Application::Ingest->post_init();
1038 $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml);
1040 my %transform_cache;
1042 for my $class (@classes) {
1043 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1044 for my $type ( keys %{ $xpathset->{$class} } ) {
1046 my $def = $xpathset->{$class}->{$type};
1047 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
1049 my $document = $xml;
1052 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
1053 $transform_cache{$def->{format}} = $document;
1056 my $value = xpath_to_string(
1057 $document->documentElement => $def->{xpath},
1058 $sf->{ns} => $def->{format},
1064 $value = NFD($value);
1065 $value =~ s/\pM+//sgo;
1066 $value =~ s/\pC+//sgo;
1067 $value =~ s/\W+$//sgo;
1069 $value =~ s/\b\.+\b//sgo;
1070 $value = lc($value);
1072 my $fm = $class_constructor->new;
1073 $fm->value( $value );
1074 $fm->field( $xpathset->{$class}->{$type}->{id} );
1075 $client->respond($fm);
1080 __PACKAGE__->register_method(
1081 api_name => "open-ils.ingest.field_entry.class.xml",
1082 method => "class_index_string_xml",
1088 sub class_index_string_record {
1092 my @classes = shift;
1094 OpenILS::Application::Ingest->post_init();
1095 my $r = OpenSRF::AppSession
1096 ->create('open-ils.cstore')
1097 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
1100 return undef unless ($r and @$r);
1102 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
1104 $client->respond($fm);
1108 __PACKAGE__->register_method(
1109 api_name => "open-ils.ingest.field_entry.class.record",
1110 method => "class_index_string_record",
1116 sub all_index_string_xml {
1121 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
1122 $client->respond($fm);
1126 __PACKAGE__->register_method(
1127 api_name => "open-ils.ingest.extract.field_entry.all.xml",
1128 method => "all_index_string_xml",
1134 sub all_index_string_record {
1139 OpenILS::Application::Ingest->post_init();
1140 my $r = OpenSRF::AppSession
1141 ->create('open-ils.cstore')
1142 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1145 return undef unless ($r and @$r);
1147 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
1149 $client->respond($fm);
1153 __PACKAGE__->register_method(
1154 api_name => "open-ils.ingest.extract.field_entry.all.record",
1155 method => "all_index_string_record",
1161 # --------------------------------------------------------------------------------
1164 package OpenILS::Application::Ingest::FlatMARC;
1165 use base qw/OpenILS::Application::Ingest/;
1166 use Unicode::Normalize;
1169 sub _marcxml_to_full_rows {
1171 my $marcxml = shift;
1172 my $xmltype = shift || 'metabib';
1174 my $type = "Fieldmapper::${xmltype}::full_rec";
1178 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1180 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1181 next unless $tagline;
1183 my $ns = $type->new;
1186 my $val = $tagline->textContent;
1188 $val =~ s/\pM+//sgo;
1189 $val =~ s/\pC+//sgo;
1190 $val =~ s/\W+$//sgo;
1196 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1197 next unless $tagline;
1199 my $ns = $type->new;
1201 $ns->tag( $tagline->getAttribute( "tag" ) );
1202 my $val = $tagline->textContent;
1204 $val =~ s/\pM+//sgo;
1205 $val =~ s/\pC+//sgo;
1206 $val =~ s/\W+$//sgo;
1212 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1213 next unless $tagline;
1215 my $tag = $tagline->getAttribute( "tag" );
1216 my $ind1 = $tagline->getAttribute( "ind1" );
1217 my $ind2 = $tagline->getAttribute( "ind2" );
1219 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1222 my $ns = $type->new;
1227 $ns->subfield( $data->getAttribute( "code" ) );
1228 my $val = $data->textContent;
1230 $val =~ s/\pM+//sgo;
1231 $val =~ s/\pC+//sgo;
1232 $val =~ s/\W+$//sgo;
1233 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
1234 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
1235 $ns->value( lc($val) );
1240 if ($xmltype eq 'metabib' and $tag eq '245') {
1243 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1244 next unless ($data and $data->getAttribute( "code" ) eq 'a');
1251 $ns->subfield( $data->getAttribute( "code" ) );
1252 my $val = substr( $data->textContent, $ind2 );
1254 $val =~ s/\pM+//sgo;
1255 $val =~ s/\pC+//sgo;
1256 $val =~ s/\W+$//sgo;
1257 $val =~ s/(\w+)\/(\w+)/$1 $2/sgo;
1258 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
1259 $ns->value( lc($val) );
1266 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
1275 $log->debug("processing [$xml]");
1277 $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml);
1279 my $type = 'metabib';
1280 $type = 'authority' if ($self->api_name =~ /authority/o);
1281 $type = 'serial' if ($self->api_name =~ /serial/o);
1283 OpenILS::Application::Ingest->post_init();
1285 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1288 __PACKAGE__->register_method(
1289 api_name => "open-ils.ingest.flat_marc.authority.xml",
1290 method => "flat_marc_xml",
1295 __PACKAGE__->register_method(
1296 api_name => "open-ils.ingest.flat_marc.biblio.xml",
1297 method => "flat_marc_xml",
1302 __PACKAGE__->register_method(
1303 api_name => "open-ils.ingest.flat_marc.serial.xml",
1304 method => "flat_marc_xml",
1310 sub flat_marc_record {
1315 my $type = 'biblio';
1316 $type = 'authority' if ($self->api_name =~ /authority/o);
1317 $type = 'serial' if ($self->api_name =~ /serial/o);
1319 OpenILS::Application::Ingest->post_init();
1320 my $r = OpenSRF::AppSession
1321 ->create('open-ils.cstore')
1322 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
1326 return undef unless ($r and $r->marc);
1328 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
1329 for my $row (@rows) {
1330 $client->respond($row);
1331 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
1335 __PACKAGE__->register_method(
1336 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
1337 method => "flat_marc_record",
1342 __PACKAGE__->register_method(
1343 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
1344 method => "flat_marc_record",
1349 __PACKAGE__->register_method(
1350 api_name => "open-ils.ingest.flat_marc.serial.record_entry",
1351 method => "flat_marc_record",
1357 # --------------------------------------------------------------------------------
1360 package OpenILS::Application::Ingest::Biblio::URI;
1361 use base qw/OpenILS::Application::Ingest/;
1362 use Unicode::Normalize;
1363 use OpenSRF::EX qw/:try/;
1366 sub _extract_856_uris {
1370 my $max_uri = shift;
1373 my $recid = $rec->id;
1374 my $marcxml = $rec->marc;
1376 my $document = $parser->parse_string($marcxml);
1377 my @nodes = $document->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]');
1379 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
1383 for my $node (@nodes) {
1384 # first, is there a URI?
1385 my $href = $node->findvalue('*[local-name()="subfield" and @code="u"]/text()');
1386 next unless ($href);
1388 # now, find the best possible label
1389 my $label = $node->findvalue('*[local-name()="subfield" and @code="y"]/text()');
1390 $label ||= $node->findvalue('*[local-name()="subfield" and @code="3"]/text()');
1394 my $use = $node->findvalue('*[local-name()="subfield" and @code="z"]/text()');
1395 $use ||= $node->findvalue('*[local-name()="subfield" and @code="2"]/text()');
1396 $use ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1398 # moving on to the URI owner
1399 my $owner = $node->findvalue('*[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce
1400 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="w"]/text()');
1401 $owner ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()');
1403 $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ...
1405 # no owner? skip it :(
1406 next unless ($owner);
1409 ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} )
1414 # now we can construct the uri object
1416 ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use_restriction => $use, active => 't' } )
1420 $uri = Fieldmapper::asset::uri->new;
1422 $uri->id( $$max_uri++ );
1423 $uri->label($label);
1426 $uri->use_restriction($use);
1429 # see if we need to create a call number
1430 my $cn = $cn_cache{$org->id};
1431 $cn = $cn->clone if ($cn);
1432 $cn->clear_isnew if ($cn);
1435 ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owning_lib => $org->id, record => $recid, label => '##URI##' } )
1439 $cn = Fieldmapper::asset::call_number->new;
1442 $cn->id( $$max_cn++ );
1443 $cn->owning_lib( $org->id );
1444 $cn->record( $recid );
1445 $cn->create_date( 'now' );
1446 $cn->creator( $rec->creator );
1447 $cn->editor( $rec->editor );
1448 $cn->edit_date( 'now' );
1449 $cn->label( '##URI##' );
1452 $cn_cache{$org->id} = $cn;
1454 push @objects, { uri => $uri, call_number => $cn };
1457 $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid");
1458 $cstore->disconnect;
1462 sub get_uris_record {
1467 OpenILS::Application::Ingest->post_init();
1468 my $r = OpenSRF::AppSession
1469 ->create('open-ils.cstore')
1470 ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec )
1473 return undef unless ($r and $r->marc);
1475 $client->respond($_) for (_extract_856_uris($r));
1478 __PACKAGE__->register_method(
1479 api_name => "open-ils.ingest.856_uri.record",
1480 method => "get_uris_record",
1486 sub get_uris_object {
1491 my $max_uri = shift;
1493 return undef unless ($obj and $obj->marc);
1495 $client->respond($_) for (_extract_856_uris($obj, \$max_cn, \$max_uri));
1498 __PACKAGE__->register_method(
1499 api_name => "open-ils.ingest.856_uri.object",
1500 method => "get_uris_object",
1507 # --------------------------------------------------------------------------------
1510 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1511 use base qw/OpenILS::Application::Ingest/;
1512 use Unicode::Normalize;
1513 use OpenSRF::EX qw/:try/;
1515 sub biblio_fingerprint_record {
1520 OpenILS::Application::Ingest->post_init();
1522 my $r = OpenSRF::AppSession
1523 ->create('open-ils.cstore')
1524 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1527 return undef unless ($r and $r->marc);
1529 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1530 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1531 $fp->{quality} = int($fp->{quality});
1534 __PACKAGE__->register_method(
1535 api_name => "open-ils.ingest.fingerprint.record",
1536 method => "biblio_fingerprint_record",
1542 sub biblio_fingerprint {
1545 my $xml = OpenILS::Application::AppUtils->entityize(shift);
1547 $log->internal("Got MARC [$xml]");
1550 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1551 my $conf = OpenSRF::Utils::SettingsClient->new;
1553 my $libs = $conf->config_value(@pfx, 'script_path');
1554 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1555 my $script_libs = (ref($libs)) ? $libs : [$libs];
1557 $log->debug("Loading script $script_file for biblio fingerprinting...");
1559 $fp_script = new OpenILS::Utils::ScriptRunner
1560 ( file => $script_file,
1561 paths => $script_libs,
1562 reset_count => 100 );
1565 $fp_script->insert('environment' => {marc => $xml} => 1);
1567 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1568 $log->debug("Script for biblio fingerprinting completed successfully...");
1572 __PACKAGE__->register_method(
1573 api_name => "open-ils.ingest.fingerprint.xml",
1574 method => "biblio_fingerprint",
1580 sub biblio_descriptor {
1583 my $xml = OpenILS::Application::AppUtils->entityize(shift);
1585 $log->internal("Got MARC [$xml]");
1588 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1589 my $conf = OpenSRF::Utils::SettingsClient->new;
1591 my $libs = $conf->config_value(@pfx, 'script_path');
1592 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1593 my $script_libs = (ref($libs)) ? $libs : [$libs];
1595 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1597 $rd_script = new OpenILS::Utils::ScriptRunner
1598 ( file => $script_file,
1599 paths => $script_libs,
1600 reset_count => 100 );
1603 $log->debug("Setting up environment for descriptor extraction script...");
1604 $rd_script->insert('environment.marc' => $xml => 1);
1605 $log->debug("Environment building complete...");
1607 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1608 $log->debug("Script for biblio descriptor extraction completed successfully");
1610 my $d1 = $res->date1;
1611 if ($d1 && $d1 ne ' ') {
1616 my $d2 = $res->date2;
1617 if ($d2 && $d2 ne ' ') {
1624 __PACKAGE__->register_method(
1625 api_name => "open-ils.ingest.descriptor.xml",
1626 method => "biblio_descriptor",