1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
16 use OpenILS::Utils::Fieldmapper;
20 use Time::HiRes qw(time);
22 our %supported_formats = (
23 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
24 mods => {ns => 'http://www.loc.gov/mods/'},
25 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
26 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
27 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
28 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
29 atom => {ns => 'http://www.w3.org/2005/Atom'},
30 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
34 rss10 => {ns => 'http://purl.org/rss/1.0/'},
35 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
40 my $log = 'OpenSRF::Utils::Logger';
42 my $parser = XML::LibXML->new();
43 my $xslt = XML::LibXSLT->new();
53 unless (keys %$xpathset) {
54 $log->debug("Running post_init", DEBUG);
56 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
58 unless ($supported_formats{mods}{xslt}) {
59 $log->debug("Loading MODS XSLT", DEBUG);
60 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
61 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
64 unless ($supported_formats{mods3}{xslt}) {
65 $log->debug("Loading MODS v3 XSLT", DEBUG);
66 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
67 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
71 my $req = OpenSRF::AppSession
72 ->create('open-ils.cstore')
74 # XXX testing new metabib field use for faceting
75 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
76 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
80 if (ref $req and @$req) {
82 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
83 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
84 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
85 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
101 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
105 # --------------------------------------------------------------------------------
108 package OpenILS::Application::Ingest::Biblio;
109 use base qw/OpenILS::Application::Ingest/;
110 use Unicode::Normalize;
112 sub rw_biblio_ingest_single_object {
117 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
118 return undef unless ($blob);
120 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
121 $bib->quality( $blob->{fingerprint}->{quality} );
123 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
125 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
127 # update full_rec stuff ...
128 my $tmp = $cstore->request(
129 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
130 { record => $bib->id }
133 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
134 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
136 # update rec_descriptor stuff ...
137 $tmp = $cstore->request(
138 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
139 { record => $bib->id }
142 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
143 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
145 # deal with classed fields...
146 for my $class ( qw/title author subject keyword series/ ) {
147 $tmp = $cstore->request(
148 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
149 { source => $bib->id }
152 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
154 for my $obj ( @{ $blob->{field_entries} } ) {
155 my $class = $obj->class_name;
156 $class =~ s/^Fieldmapper:://o;
158 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
163 $tmp = $cstore->request(
164 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
165 { source => $bib->id }
168 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
171 my $old_mrs = $cstore->request(
172 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
173 )->gather(1) if (@$tmp);
175 $old_mrs = [] if (!ref($old_mrs));
178 for my $m (@$old_mrs) {
179 if ($m->fingerprint eq $bib->fingerprint) {
182 my $others = $cstore->request(
183 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
188 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
198 # Get the matchin MR, if any.
199 $mr = $cstore->request(
200 'open-ils.cstore.direct.metabib.metarecord.search',
201 { fingerprint => $bib->fingerprint }
204 $holds = $cstore->request(
205 'open-ils.cstore.direct.action.hold_request.search.atomic',
206 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
207 )->gather(1) if (@$old_mrs);
210 for my $h (@$holds) {
212 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
219 $mr = new Fieldmapper::metabib::metarecord;
220 $mr->fingerprint( $bib->fingerprint );
221 $mr->master_record( $bib->id );
224 "open-ils.cstore.direct.metabib.metarecord.create",
225 $mr => { quiet => 'true' }
229 for my $h (grep { !$_->ischanged } @$holds) {
231 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
234 my $mrm = $cstore->request(
235 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
236 { metarecord => $mr->id }
240 my $best = $cstore->request(
241 "open-ils.cstore.direct.biblio.record_entry.search",
242 { id => [ map { $_->source } @$mrm ] },
243 { 'select' => { bre => [ qw/id quality/ ] },
244 order_by => { bre => "quality desc" },
249 if ($best->quality > $bib->quality) {
250 $mr->master_record($best->id);
252 $mr->master_record($bib->id);
255 $mr->master_record($bib->id);
260 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
263 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
264 $mrm->source($bib->id);
265 $mrm->metarecord($mr->id);
267 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
268 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
270 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
274 __PACKAGE__->register_method(
275 api_name => "open-ils.ingest.full.biblio.object",
276 method => "rw_biblio_ingest_single_object",
281 sub rw_biblio_ingest_single_record {
286 OpenILS::Application::Ingest->post_init();
287 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
288 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
290 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
292 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
295 return undef unless ($r and @$r);
297 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
299 __PACKAGE__->register_method(
300 api_name => "open-ils.ingest.full.biblio.record",
301 method => "rw_biblio_ingest_single_record",
306 sub rw_biblio_ingest_record_list {
309 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
311 OpenILS::Application::Ingest->post_init();
312 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
313 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
315 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
317 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
320 return undef unless ($r and @$r);
323 $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
327 __PACKAGE__->register_method(
328 api_name => "open-ils.ingest.full.biblio.record_list",
329 method => "rw_biblio_ingest_record_list",
334 sub ro_biblio_ingest_single_object {
338 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
340 my $document = $parser->parse_string($xml);
342 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
343 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
344 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
345 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
347 $_->source($bib->id) for (@mXfe);
348 $_->record($bib->id) for (@mfr);
349 $rd->record($bib->id) if ($rd);
351 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
353 __PACKAGE__->register_method(
354 api_name => "open-ils.ingest.full.biblio.object.readonly",
355 method => "ro_biblio_ingest_single_object",
360 sub ro_biblio_ingest_single_xml {
363 my $xml = OpenILS::Application::Ingest::entityize(shift);
365 my $document = $parser->parse_string($xml);
367 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
368 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
369 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
370 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
372 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
374 __PACKAGE__->register_method(
375 api_name => "open-ils.ingest.full.biblio.xml.readonly",
376 method => "ro_biblio_ingest_single_xml",
381 sub ro_biblio_ingest_single_record {
386 OpenILS::Application::Ingest->post_init();
387 my $r = OpenSRF::AppSession
388 ->create('open-ils.cstore')
389 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
392 return undef unless ($r and @$r);
394 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
396 $_->source($rec) for (@{$res->{field_entries}});
397 $_->record($rec) for (@{$res->{full_rec}});
398 $res->{descriptor}->record($rec);
402 __PACKAGE__->register_method(
403 api_name => "open-ils.ingest.full.biblio.record.readonly",
404 method => "ro_biblio_ingest_single_record",
409 sub ro_biblio_ingest_stream_record {
413 OpenILS::Application::Ingest->post_init();
415 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
417 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
419 my $rec = $resp->content;
420 last unless (defined $rec);
422 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
423 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
425 $_->source($rec) for (@{$res->{field_entries}});
426 $_->record($rec) for (@{$res->{full_rec}});
428 $client->respond( $res );
433 __PACKAGE__->register_method(
434 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
435 method => "ro_biblio_ingest_stream_record",
440 sub ro_biblio_ingest_stream_xml {
444 OpenILS::Application::Ingest->post_init();
446 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
448 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
450 my $xml = $resp->content;
451 last unless (defined $xml);
453 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
454 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
456 $client->respond( $res );
461 __PACKAGE__->register_method(
462 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
463 method => "ro_biblio_ingest_stream_xml",
468 sub rw_biblio_ingest_stream_import {
472 OpenILS::Application::Ingest->post_init();
474 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
476 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
478 my $bib = $resp->content;
479 last unless (defined $bib);
481 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
482 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
484 $_->source($bib->id) for (@{$res->{field_entries}});
485 $_->record($bib->id) for (@{$res->{full_rec}});
487 $client->respond( $res );
492 __PACKAGE__->register_method(
493 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
494 method => "rw_biblio_ingest_stream_import",
500 # --------------------------------------------------------------------------------
503 package OpenILS::Application::Ingest::Authority;
504 use base qw/OpenILS::Application::Ingest/;
505 use Unicode::Normalize;
507 sub ro_authority_ingest_single_object {
511 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
513 my $document = $parser->parse_string($xml);
515 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
517 $_->record($bib->id) for (@mfr);
519 return { full_rec => \@mfr };
521 __PACKAGE__->register_method(
522 api_name => "open-ils.ingest.full.authority.object.readonly",
523 method => "ro_authority_ingest_single_object",
528 sub ro_authority_ingest_single_xml {
531 my $xml = OpenILS::Application::Ingest::entityize(shift);
533 my $document = $parser->parse_string($xml);
535 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
537 return { full_rec => \@mfr };
539 __PACKAGE__->register_method(
540 api_name => "open-ils.ingest.full.authority.xml.readonly",
541 method => "ro_authority_ingest_single_xml",
546 sub ro_authority_ingest_single_record {
551 OpenILS::Application::Ingest->post_init();
552 my $r = OpenSRF::AppSession
553 ->create('open-ils.cstore')
554 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
557 return undef unless ($r and @$r);
559 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
561 $_->record($rec) for (@{$res->{full_rec}});
562 $res->{descriptor}->record($rec);
566 __PACKAGE__->register_method(
567 api_name => "open-ils.ingest.full.authority.record.readonly",
568 method => "ro_authority_ingest_single_record",
573 sub ro_authority_ingest_stream_record {
577 OpenILS::Application::Ingest->post_init();
579 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
581 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
583 my $rec = $resp->content;
584 last unless (defined $rec);
586 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
587 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
589 $_->record($rec) for (@{$res->{full_rec}});
591 $client->respond( $res );
596 __PACKAGE__->register_method(
597 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
598 method => "ro_authority_ingest_stream_record",
603 sub ro_authority_ingest_stream_xml {
607 OpenILS::Application::Ingest->post_init();
609 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
611 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
613 my $xml = $resp->content;
614 last unless (defined $xml);
616 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
617 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
619 $client->respond( $res );
624 __PACKAGE__->register_method(
625 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
626 method => "ro_authority_ingest_stream_xml",
631 sub rw_authority_ingest_stream_import {
635 OpenILS::Application::Ingest->post_init();
637 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
639 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
641 my $bib = $resp->content;
642 last unless (defined $bib);
644 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
645 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
647 $_->record($bib->id) for (@{$res->{full_rec}});
649 $client->respond( $res );
654 __PACKAGE__->register_method(
655 api_name => "open-ils.ingest.full.authority.bib_stream.import",
656 method => "rw_authority_ingest_stream_import",
662 # --------------------------------------------------------------------------------
663 # MARC index extraction
665 package OpenILS::Application::Ingest::XPATH;
666 use base qw/OpenILS::Application::Ingest/;
667 use Unicode::Normalize;
669 # give this an XML documentElement and an XPATH expression
670 sub xpath_to_string {
674 my $ns_prefix = shift;
677 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
681 # grab the set of matching nodes
682 my @nodes = $xml->findnodes( $xpath );
683 for my $value (@nodes) {
685 # grab all children of the node
686 my @children = $value->childNodes();
687 for my $child (@children) {
689 # add the childs content to the growing buffer
690 my $content = quotemeta($child->textContent);
691 next if ($unique && $string =~ /$content/); # uniquify the values
692 $string .= $child->textContent . " ";
695 $string .= $value->textContent . " ";
701 sub class_index_string_xml {
707 OpenILS::Application::Ingest->post_init();
708 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
712 for my $class (@classes) {
713 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
714 for my $type ( keys %{ $xpathset->{$class} } ) {
716 my $def = $xpathset->{$class}->{$type};
717 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
722 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
723 $transform_cache{$def->{format}} = $document;
726 my $value = xpath_to_string(
727 $document->documentElement => $def->{xpath},
728 $sf->{ns} => $def->{format},
734 $value = NFD($value);
735 $value =~ s/\pM+//sgo;
736 $value =~ s/\pC+//sgo;
737 $value =~ s/\W+$//sgo;
739 $value =~ s/\b\.+\b//sgo;
742 my $fm = $class_constructor->new;
743 $fm->value( $value );
744 $fm->field( $xpathset->{$class}->{$type}->{id} );
745 $client->respond($fm);
750 __PACKAGE__->register_method(
751 api_name => "open-ils.ingest.field_entry.class.xml",
752 method => "class_index_string_xml",
758 sub class_index_string_record {
764 OpenILS::Application::Ingest->post_init();
765 my $r = OpenSRF::AppSession
766 ->create('open-ils.cstore')
767 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
770 return undef unless ($r and @$r);
772 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
774 $client->respond($fm);
778 __PACKAGE__->register_method(
779 api_name => "open-ils.ingest.field_entry.class.record",
780 method => "class_index_string_record",
786 sub all_index_string_xml {
791 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
792 $client->respond($fm);
796 __PACKAGE__->register_method(
797 api_name => "open-ils.ingest.extract.field_entry.all.xml",
798 method => "all_index_string_xml",
804 sub all_index_string_record {
809 OpenILS::Application::Ingest->post_init();
810 my $r = OpenSRF::AppSession
811 ->create('open-ils.cstore')
812 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
815 return undef unless ($r and @$r);
817 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
819 $client->respond($fm);
823 __PACKAGE__->register_method(
824 api_name => "open-ils.ingest.extract.field_entry.all.record",
825 method => "all_index_string_record",
831 # --------------------------------------------------------------------------------
834 package OpenILS::Application::Ingest::FlatMARC;
835 use base qw/OpenILS::Application::Ingest/;
836 use Unicode::Normalize;
839 sub _marcxml_to_full_rows {
842 my $xmltype = shift || 'metabib';
844 my $type = "Fieldmapper::${xmltype}::full_rec";
848 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
850 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
851 next unless $tagline;
856 my $val = $tagline->textContent;
866 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
867 next unless $tagline;
871 $ns->tag( $tagline->getAttribute( "tag" ) );
872 my $val = $tagline->textContent;
882 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
883 next unless $tagline;
885 my $tag = $tagline->getAttribute( "tag" );
886 my $ind1 = $tagline->getAttribute( "ind1" );
887 my $ind2 = $tagline->getAttribute( "ind2" );
889 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
897 $ns->subfield( $data->getAttribute( "code" ) );
898 my $val = $data->textContent;
903 $ns->value( lc($val) );
909 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
918 $log->debug("processing [$xml]");
920 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
922 my $type = 'metabib';
923 $type = 'authority' if ($self->api_name =~ /authority/o);
925 OpenILS::Application::Ingest->post_init();
927 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
930 __PACKAGE__->register_method(
931 api_name => "open-ils.ingest.flat_marc.authority.xml",
932 method => "flat_marc_xml",
937 __PACKAGE__->register_method(
938 api_name => "open-ils.ingest.flat_marc.biblio.xml",
939 method => "flat_marc_xml",
945 sub flat_marc_record {
951 $type = 'authority' if ($self->api_name =~ /authority/o);
953 OpenILS::Application::Ingest->post_init();
954 my $r = OpenSRF::AppSession
955 ->create('open-ils.cstore')
956 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
960 return undef unless ($r and $r->marc);
962 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
963 for my $row (@rows) {
964 $client->respond($row);
965 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
969 __PACKAGE__->register_method(
970 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
971 method => "flat_marc_record",
976 __PACKAGE__->register_method(
977 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
978 method => "flat_marc_record",
984 # --------------------------------------------------------------------------------
987 package OpenILS::Application::Ingest::Biblio::Fingerprint;
988 use base qw/OpenILS::Application::Ingest/;
989 use Unicode::Normalize;
990 use OpenSRF::EX qw/:try/;
992 sub biblio_fingerprint_record {
997 OpenILS::Application::Ingest->post_init();
999 my $r = OpenSRF::AppSession
1000 ->create('open-ils.cstore')
1001 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1004 return undef unless ($r and $r->marc);
1006 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1007 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1008 $fp->{quality} = int($fp->{quality});
1011 __PACKAGE__->register_method(
1012 api_name => "open-ils.ingest.fingerprint.record",
1013 method => "biblio_fingerprint_record",
1019 sub biblio_fingerprint {
1022 my $xml = OpenILS::Application::Ingest::entityize(shift);
1024 $log->internal("Got MARC [$xml]");
1027 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1028 my $conf = OpenSRF::Utils::SettingsClient->new;
1030 my $libs = $conf->config_value(@pfx, 'script_path');
1031 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1032 my $script_libs = (ref($libs)) ? $libs : [$libs];
1034 $log->debug("Loading script $script_file for biblio fingerprinting...");
1036 $fp_script = new OpenILS::Utils::ScriptRunner
1037 ( file => $script_file,
1038 paths => $script_libs,
1039 reset_count => 100 );
1042 $fp_script->insert('environment' => {marc => $xml} => 1);
1044 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1045 $log->debug("Script for biblio fingerprinting completed successfully...");
1049 __PACKAGE__->register_method(
1050 api_name => "open-ils.ingest.fingerprint.xml",
1051 method => "biblio_fingerprint",
1057 sub biblio_descriptor {
1060 my $xml = OpenILS::Application::Ingest::entityize(shift);
1062 $log->internal("Got MARC [$xml]");
1065 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1066 my $conf = OpenSRF::Utils::SettingsClient->new;
1068 my $libs = $conf->config_value(@pfx, 'script_path');
1069 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1070 my $script_libs = (ref($libs)) ? $libs : [$libs];
1072 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1074 $rd_script = new OpenILS::Utils::ScriptRunner
1075 ( file => $script_file,
1076 paths => $script_libs,
1077 reset_count => 100 );
1080 $log->debug("Setting up environment for descriptor extraction script...");
1081 $rd_script->insert('environment.marc' => $xml => 1);
1082 $log->debug("Environment building complete...");
1084 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1085 $log->debug("Script for biblio descriptor extraction completed successfully");
1089 __PACKAGE__->register_method(
1090 api_name => "open-ils.ingest.descriptor.xml",
1091 method => "biblio_descriptor",