1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
13 use OpenSRF::Utils::JSON;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
21 our %supported_formats = (
22 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
23 mods => {ns => 'http://www.loc.gov/mods/'},
24 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
26 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28 atom => {ns => 'http://www.w3.org/2005/Atom'},
29 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
33 rss10 => {ns => 'http://purl.org/rss/1.0/'},
34 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
39 my $log = 'OpenSRF::Utils::Logger';
41 my $parser = XML::LibXML->new();
42 my $xslt = XML::LibXSLT->new();
52 unless (keys %$xpathset) {
53 $log->debug("Running post_init", DEBUG);
55 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57 unless ($supported_formats{mods}{xslt}) {
58 $log->debug("Loading MODS XSLT", DEBUG);
59 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63 unless ($supported_formats{mods3}{xslt}) {
64 $log->debug("Loading MODS v3 XSLT", DEBUG);
65 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
70 my $req = OpenSRF::AppSession
71 ->create('open-ils.cstore')
73 # XXX testing new metabib field use for faceting
74 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
79 if (ref $req and @$req) {
81 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
82 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
83 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
84 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
100 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
104 # --------------------------------------------------------------------------------
107 package OpenILS::Application::Ingest::Biblio;
108 use base qw/OpenILS::Application::Ingest/;
109 use Unicode::Normalize;
111 sub rw_biblio_ingest_single_object {
116 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
117 return undef unless ($blob);
119 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
120 $bib->quality( $blob->{fingerprint}->{quality} );
122 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
124 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
126 # update full_rec stuff ...
127 my $tmp = $cstore->request(
128 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
129 { record => $bib->id }
132 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
133 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
135 # update rec_descriptor stuff ...
136 $tmp = $cstore->request(
137 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
138 { record => $bib->id }
141 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
142 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
144 # deal with classed fields...
145 for my $class ( qw/title author subject keyword series/ ) {
146 $tmp = $cstore->request(
147 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
148 { source => $bib->id }
151 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
153 for my $obj ( @{ $blob->{field_entries} } ) {
154 my $class = $obj->class_name;
155 $class =~ s/^Fieldmapper:://o;
157 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
162 $tmp = $cstore->request(
163 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
164 { source => $bib->id }
167 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
170 my $old_mrs = $cstore->request(
171 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
172 )->gather(1) if (@$tmp);
174 $old_mrs = [] if (!ref($old_mrs));
177 for my $m (@$old_mrs) {
178 if ($m->fingerprint eq $bib->fingerprint) {
181 my $others = $cstore->request(
182 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
187 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
197 # Get the matchin MR, if any.
198 $mr = $cstore->request(
199 'open-ils.cstore.direct.metabib.metarecord.search',
200 { fingerprint => $bib->fingerprint }
203 $holds = $cstore->request(
204 'open-ils.cstore.direct.action.hold_request.search.atomic',
205 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
206 )->gather(1) if (@$old_mrs);
209 for my $h (@$holds) {
211 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
218 $mr = new Fieldmapper::metabib::metarecord;
219 $mr->fingerprint( $bib->fingerprint );
220 $mr->master_record( $bib->id );
223 "open-ils.cstore.direct.metabib.metarecord.create",
224 $mr => { quiet => 'true' }
228 for my $h (grep { !$_->ischanged } @$holds) {
230 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
233 my $mrm = $cstore->request(
234 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
235 { metarecord => $mr->id }
239 my $best = $cstore->request(
240 "open-ils.cstore.direct.biblio.record_entry.search",
241 { id => [ map { $_->source } @$mrm ] },
242 { 'select' => { bre => [ qw/id quality/ ] },
243 order_by => { bre => "quality desc" },
248 if ($best->quality > $bib->quality) {
249 $mr->master_record($best->id);
251 $mr->master_record($bib->id);
254 $mr->master_record($bib->id);
259 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
262 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
263 $mrm->source($bib->id);
264 $mrm->metarecord($mr->id);
266 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
267 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
269 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
273 __PACKAGE__->register_method(
274 api_name => "open-ils.ingest.full.biblio.object",
275 method => "rw_biblio_ingest_single_object",
280 sub rw_biblio_ingest_single_record {
285 OpenILS::Application::Ingest->post_init();
286 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
287 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
289 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
291 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
294 return undef unless ($r and @$r);
296 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
298 __PACKAGE__->register_method(
299 api_name => "open-ils.ingest.full.biblio.record",
300 method => "rw_biblio_ingest_single_record",
305 sub rw_biblio_ingest_record_list {
308 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
310 OpenILS::Application::Ingest->post_init();
311 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
312 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
314 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
316 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
319 return undef unless ($r and @$r);
322 $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
326 __PACKAGE__->register_method(
327 api_name => "open-ils.ingest.full.biblio.record_list",
328 method => "rw_biblio_ingest_record_list",
333 sub ro_biblio_ingest_single_object {
337 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
339 my $document = $parser->parse_string($xml);
341 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
342 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
343 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
344 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
346 $_->source($bib->id) for (@mXfe);
347 $_->record($bib->id) for (@mfr);
348 $rd->record($bib->id) if ($rd);
350 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
352 __PACKAGE__->register_method(
353 api_name => "open-ils.ingest.full.biblio.object.readonly",
354 method => "ro_biblio_ingest_single_object",
359 sub ro_biblio_ingest_single_xml {
362 my $xml = OpenILS::Application::Ingest::entityize(shift);
364 my $document = $parser->parse_string($xml);
366 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
367 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
368 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
369 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
371 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
373 __PACKAGE__->register_method(
374 api_name => "open-ils.ingest.full.biblio.xml.readonly",
375 method => "ro_biblio_ingest_single_xml",
380 sub ro_biblio_ingest_single_record {
385 OpenILS::Application::Ingest->post_init();
386 my $r = OpenSRF::AppSession
387 ->create('open-ils.cstore')
388 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
391 return undef unless ($r and @$r);
393 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
395 $_->source($rec) for (@{$res->{field_entries}});
396 $_->record($rec) for (@{$res->{full_rec}});
397 $res->{descriptor}->record($rec);
401 __PACKAGE__->register_method(
402 api_name => "open-ils.ingest.full.biblio.record.readonly",
403 method => "ro_biblio_ingest_single_record",
408 sub ro_biblio_ingest_stream_record {
412 OpenILS::Application::Ingest->post_init();
414 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
416 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
418 my $rec = $resp->content;
419 last unless (defined $rec);
421 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
422 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
424 $_->source($rec) for (@{$res->{field_entries}});
425 $_->record($rec) for (@{$res->{full_rec}});
427 $client->respond( $res );
432 __PACKAGE__->register_method(
433 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
434 method => "ro_biblio_ingest_stream_record",
439 sub ro_biblio_ingest_stream_xml {
443 OpenILS::Application::Ingest->post_init();
445 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
447 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
449 my $xml = $resp->content;
450 last unless (defined $xml);
452 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
453 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
455 $client->respond( $res );
460 __PACKAGE__->register_method(
461 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
462 method => "ro_biblio_ingest_stream_xml",
467 sub rw_biblio_ingest_stream_import {
471 OpenILS::Application::Ingest->post_init();
473 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
475 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
477 my $bib = $resp->content;
478 last unless (defined $bib);
480 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
481 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
483 $_->source($bib->id) for (@{$res->{field_entries}});
484 $_->record($bib->id) for (@{$res->{full_rec}});
486 $client->respond( $res );
491 __PACKAGE__->register_method(
492 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
493 method => "rw_biblio_ingest_stream_import",
499 # --------------------------------------------------------------------------------
502 package OpenILS::Application::Ingest::Authority;
503 use base qw/OpenILS::Application::Ingest/;
504 use Unicode::Normalize;
506 sub ro_authority_ingest_single_object {
510 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
512 my $document = $parser->parse_string($xml);
514 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
516 $_->record($bib->id) for (@mfr);
518 return { full_rec => \@mfr };
520 __PACKAGE__->register_method(
521 api_name => "open-ils.ingest.full.authority.object.readonly",
522 method => "ro_authority_ingest_single_object",
527 sub ro_authority_ingest_single_xml {
530 my $xml = OpenILS::Application::Ingest::entityize(shift);
532 my $document = $parser->parse_string($xml);
534 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
536 return { full_rec => \@mfr };
538 __PACKAGE__->register_method(
539 api_name => "open-ils.ingest.full.authority.xml.readonly",
540 method => "ro_authority_ingest_single_xml",
545 sub ro_authority_ingest_single_record {
550 OpenILS::Application::Ingest->post_init();
551 my $r = OpenSRF::AppSession
552 ->create('open-ils.cstore')
553 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
556 return undef unless ($r and @$r);
558 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
560 $_->record($rec) for (@{$res->{full_rec}});
561 $res->{descriptor}->record($rec);
565 __PACKAGE__->register_method(
566 api_name => "open-ils.ingest.full.authority.record.readonly",
567 method => "ro_authority_ingest_single_record",
572 sub ro_authority_ingest_stream_record {
576 OpenILS::Application::Ingest->post_init();
578 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
580 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
582 my $rec = $resp->content;
583 last unless (defined $rec);
585 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
586 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
588 $_->record($rec) for (@{$res->{full_rec}});
590 $client->respond( $res );
595 __PACKAGE__->register_method(
596 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
597 method => "ro_authority_ingest_stream_record",
602 sub ro_authority_ingest_stream_xml {
606 OpenILS::Application::Ingest->post_init();
608 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
610 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
612 my $xml = $resp->content;
613 last unless (defined $xml);
615 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
616 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
618 $client->respond( $res );
623 __PACKAGE__->register_method(
624 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
625 method => "ro_authority_ingest_stream_xml",
630 sub rw_authority_ingest_stream_import {
634 OpenILS::Application::Ingest->post_init();
636 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
638 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
640 my $bib = $resp->content;
641 last unless (defined $bib);
643 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
644 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
646 $_->record($bib->id) for (@{$res->{full_rec}});
648 $client->respond( $res );
653 __PACKAGE__->register_method(
654 api_name => "open-ils.ingest.full.authority.bib_stream.import",
655 method => "rw_authority_ingest_stream_import",
661 # --------------------------------------------------------------------------------
662 # MARC index extraction
664 package OpenILS::Application::Ingest::XPATH;
665 use base qw/OpenILS::Application::Ingest/;
666 use Unicode::Normalize;
668 # give this an XML documentElement and an XPATH expression
669 sub xpath_to_string {
673 my $ns_prefix = shift;
676 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
680 # grab the set of matching nodes
681 my @nodes = $xml->findnodes( $xpath );
682 for my $value (@nodes) {
684 # grab all children of the node
685 my @children = $value->childNodes();
686 for my $child (@children) {
688 # add the childs content to the growing buffer
689 my $content = quotemeta($child->textContent);
690 next if ($unique && $string =~ /$content/); # uniquify the values
691 $string .= $child->textContent . " ";
694 $string .= $value->textContent . " ";
700 sub class_index_string_xml {
706 OpenILS::Application::Ingest->post_init();
707 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
711 for my $class (@classes) {
712 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
713 for my $type ( keys %{ $xpathset->{$class} } ) {
715 my $def = $xpathset->{$class}->{$type};
716 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
721 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
722 $transform_cache{$def->{format}} = $document;
725 my $value = xpath_to_string(
726 $document->documentElement => $def->{xpath},
727 $sf->{ns} => $def->{format},
733 $value = NFD($value);
734 $value =~ s/\pM+//sgo;
735 $value =~ s/\pC+//sgo;
736 $value =~ s/\W+$//sgo;
738 $value =~ s/\b\.+\b//sgo;
741 my $fm = $class_constructor->new;
742 $fm->value( $value );
743 $fm->field( $xpathset->{$class}->{$type}->{id} );
744 $client->respond($fm);
749 __PACKAGE__->register_method(
750 api_name => "open-ils.ingest.field_entry.class.xml",
751 method => "class_index_string_xml",
757 sub class_index_string_record {
763 OpenILS::Application::Ingest->post_init();
764 my $r = OpenSRF::AppSession
765 ->create('open-ils.cstore')
766 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
769 return undef unless ($r and @$r);
771 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
773 $client->respond($fm);
777 __PACKAGE__->register_method(
778 api_name => "open-ils.ingest.field_entry.class.record",
779 method => "class_index_string_record",
785 sub all_index_string_xml {
790 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
791 $client->respond($fm);
795 __PACKAGE__->register_method(
796 api_name => "open-ils.ingest.extract.field_entry.all.xml",
797 method => "all_index_string_xml",
803 sub all_index_string_record {
808 OpenILS::Application::Ingest->post_init();
809 my $r = OpenSRF::AppSession
810 ->create('open-ils.cstore')
811 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
814 return undef unless ($r and @$r);
816 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
818 $client->respond($fm);
822 __PACKAGE__->register_method(
823 api_name => "open-ils.ingest.extract.field_entry.all.record",
824 method => "all_index_string_record",
830 # --------------------------------------------------------------------------------
833 package OpenILS::Application::Ingest::FlatMARC;
834 use base qw/OpenILS::Application::Ingest/;
835 use Unicode::Normalize;
838 sub _marcxml_to_full_rows {
841 my $xmltype = shift || 'metabib';
843 my $type = "Fieldmapper::${xmltype}::full_rec";
847 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
849 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
850 next unless $tagline;
855 my $val = $tagline->textContent;
865 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
866 next unless $tagline;
870 $ns->tag( $tagline->getAttribute( "tag" ) );
871 my $val = $tagline->textContent;
881 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
882 next unless $tagline;
884 my $tag = $tagline->getAttribute( "tag" );
885 my $ind1 = $tagline->getAttribute( "ind1" );
886 my $ind2 = $tagline->getAttribute( "ind2" );
888 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
896 $ns->subfield( $data->getAttribute( "code" ) );
897 my $val = $data->textContent;
902 $ns->value( lc($val) );
908 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
917 $log->debug("processing [$xml]");
919 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
921 my $type = 'metabib';
922 $type = 'authority' if ($self->api_name =~ /authority/o);
924 OpenILS::Application::Ingest->post_init();
926 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
929 __PACKAGE__->register_method(
930 api_name => "open-ils.ingest.flat_marc.authority.xml",
931 method => "flat_marc_xml",
936 __PACKAGE__->register_method(
937 api_name => "open-ils.ingest.flat_marc.biblio.xml",
938 method => "flat_marc_xml",
944 sub flat_marc_record {
950 $type = 'authority' if ($self->api_name =~ /authority/o);
952 OpenILS::Application::Ingest->post_init();
953 my $r = OpenSRF::AppSession
954 ->create('open-ils.cstore')
955 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
959 return undef unless ($r and $r->marc);
961 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
962 for my $row (@rows) {
963 $client->respond($row);
964 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
968 __PACKAGE__->register_method(
969 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
970 method => "flat_marc_record",
975 __PACKAGE__->register_method(
976 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
977 method => "flat_marc_record",
983 # --------------------------------------------------------------------------------
986 package OpenILS::Application::Ingest::Biblio::Fingerprint;
987 use base qw/OpenILS::Application::Ingest/;
988 use Unicode::Normalize;
989 use OpenSRF::EX qw/:try/;
991 sub biblio_fingerprint_record {
996 OpenILS::Application::Ingest->post_init();
998 my $r = OpenSRF::AppSession
999 ->create('open-ils.cstore')
1000 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1003 return undef unless ($r and $r->marc);
1005 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1006 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1007 $fp->{quality} = int($fp->{quality});
1010 __PACKAGE__->register_method(
1011 api_name => "open-ils.ingest.fingerprint.record",
1012 method => "biblio_fingerprint_record",
1018 sub biblio_fingerprint {
1021 my $xml = OpenILS::Application::Ingest::entityize(shift);
1023 $log->internal("Got MARC [$xml]");
1026 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1027 my $conf = OpenSRF::Utils::SettingsClient->new;
1029 my $libs = $conf->config_value(@pfx, 'script_path');
1030 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1031 my $script_libs = (ref($libs)) ? $libs : [$libs];
1033 $log->debug("Loading script $script_file for biblio fingerprinting...");
1035 $fp_script = new OpenILS::Utils::ScriptRunner
1036 ( file => $script_file,
1037 paths => $script_libs,
1038 reset_count => 100 );
1041 $fp_script->insert('environment' => {marc => $xml} => 1);
1043 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1044 $log->debug("Script for biblio fingerprinting completed successfully...");
1048 __PACKAGE__->register_method(
1049 api_name => "open-ils.ingest.fingerprint.xml",
1050 method => "biblio_fingerprint",
1056 sub biblio_descriptor {
1059 my $xml = OpenILS::Application::Ingest::entityize(shift);
1061 $log->internal("Got MARC [$xml]");
1064 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1065 my $conf = OpenSRF::Utils::SettingsClient->new;
1067 my $libs = $conf->config_value(@pfx, 'script_path');
1068 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1069 my $script_libs = (ref($libs)) ? $libs : [$libs];
1071 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1073 $rd_script = new OpenILS::Utils::ScriptRunner
1074 ( file => $script_file,
1075 paths => $script_libs,
1076 reset_count => 100 );
1079 $log->debug("Setting up environment for descriptor extraction script...");
1080 $rd_script->insert('environment.marc' => $xml => 1);
1081 $log->debug("Environment building complete...");
1083 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1084 $log->debug("Script for biblio descriptor extraction completed successfully");
1088 __PACKAGE__->register_method(
1089 api_name => "open-ils.ingest.descriptor.xml",
1090 method => "biblio_descriptor",