1 package OpenILS::Application::Ingest;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
8 use OpenSRF::AppSession;
9 use OpenSRF::Utils::SettingsClient;
10 use OpenSRF::Utils::Logger qw/:level/;
12 use OpenILS::Utils::ScriptRunner;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenSRF::Utils::JSON;
16 use OpenILS::Utils::Fieldmapper;
20 use Time::HiRes qw(time);
22 our %supported_formats = (
23 mods32 => {ns => 'http://www.loc.gov/mods/v3'},
24 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
25 mods => {ns => 'http://www.loc.gov/mods/'},
26 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
27 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
28 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
29 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
30 atom => {ns => 'http://www.w3.org/2005/Atom'},
31 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
35 rss10 => {ns => 'http://purl.org/rss/1.0/'},
36 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
41 my $log = 'OpenSRF::Utils::Logger';
43 my $parser = XML::LibXML->new();
44 my $xslt = XML::LibXSLT->new();
54 unless (keys %$xpathset) {
55 $log->debug("Running post_init", DEBUG);
57 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
59 unless ($supported_formats{mods}{xslt}) {
60 $log->debug("Loading MODS XSLT", DEBUG);
61 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
62 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
65 unless ($supported_formats{mods3}{xslt}) {
66 $log->debug("Loading MODS v3 XSLT", DEBUG);
67 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
68 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
71 unless ($supported_formats{mods32}{xslt}) {
72 $log->debug("Loading MODS v32 XSLT", DEBUG);
73 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl");
74 $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
77 my $req = OpenSRF::AppSession
78 ->create('open-ils.cstore')
80 # XXX testing new metabib field use for faceting
81 #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
82 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } )
86 if (ref $req and @$req) {
88 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
89 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
90 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
91 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
102 $stuff = NFD($stuff);
104 $stuff = NFC($stuff);
107 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
111 # --------------------------------------------------------------------------------
114 package OpenILS::Application::Ingest::Biblio;
115 use base qw/OpenILS::Application::Ingest/;
116 use Unicode::Normalize;
118 sub rw_biblio_ingest_single_object {
123 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
124 return undef unless ($blob);
126 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
127 $bib->quality( $blob->{fingerprint}->{quality} );
129 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
131 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
133 # update full_rec stuff ...
134 my $tmp = $cstore->request(
135 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
136 { record => $bib->id }
139 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
140 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
142 # update rec_descriptor stuff ...
143 $tmp = $cstore->request(
144 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
145 { record => $bib->id }
148 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
149 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
151 # deal with classed fields...
152 for my $class ( qw/title author subject keyword series/ ) {
153 $tmp = $cstore->request(
154 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
155 { source => $bib->id }
158 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
160 for my $obj ( @{ $blob->{field_entries} } ) {
161 my $class = $obj->class_name;
162 $class =~ s/^Fieldmapper:://o;
164 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
169 $tmp = $cstore->request(
170 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
171 { source => $bib->id }
174 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
177 my $old_mrs = $cstore->request(
178 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
179 )->gather(1) if (@$tmp);
181 $old_mrs = [] if (!ref($old_mrs));
184 for my $m (@$old_mrs) {
185 if ($m->fingerprint eq $bib->fingerprint) {
188 my $others = $cstore->request(
189 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
194 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
204 # Get the matchin MR, if any.
205 $mr = $cstore->request(
206 'open-ils.cstore.direct.metabib.metarecord.search',
207 { fingerprint => $bib->fingerprint }
210 $holds = $cstore->request(
211 'open-ils.cstore.direct.action.hold_request.search.atomic',
212 { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
213 )->gather(1) if (@$old_mrs);
216 for my $h (@$holds) {
218 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
225 $mr = new Fieldmapper::metabib::metarecord;
226 $mr->fingerprint( $bib->fingerprint );
227 $mr->master_record( $bib->id );
230 "open-ils.cstore.direct.metabib.metarecord.create",
231 $mr => { quiet => 'true' }
235 for my $h (grep { !$_->ischanged } @$holds) {
237 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
240 my $mrm = $cstore->request(
241 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
242 { metarecord => $mr->id }
246 my $best = $cstore->request(
247 "open-ils.cstore.direct.biblio.record_entry.search",
248 { id => [ map { $_->source } @$mrm ] },
249 { 'select' => { bre => [ qw/id quality/ ] },
250 order_by => { bre => "quality desc" },
255 if ($best->quality > $bib->quality) {
256 $mr->master_record($best->id);
258 $mr->master_record($bib->id);
261 $mr->master_record($bib->id);
266 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
269 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
270 $mrm->source($bib->id);
271 $mrm->metarecord($mr->id);
273 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
274 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
276 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
281 __PACKAGE__->register_method(
282 api_name => "open-ils.ingest.full.biblio.object",
283 method => "rw_biblio_ingest_single_object",
288 sub rw_biblio_ingest_single_record {
293 OpenILS::Application::Ingest->post_init();
294 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
295 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
297 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
299 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
302 return undef unless ($r and @$r);
304 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
306 __PACKAGE__->register_method(
307 api_name => "open-ils.ingest.full.biblio.record",
308 method => "rw_biblio_ingest_single_record",
313 sub rw_biblio_ingest_record_list {
316 my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
318 OpenILS::Application::Ingest->post_init();
319 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
320 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
322 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
324 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
327 return undef unless ($r and @$r);
330 $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
334 __PACKAGE__->register_method(
335 api_name => "open-ils.ingest.full.biblio.record_list",
336 method => "rw_biblio_ingest_record_list",
341 sub ro_biblio_ingest_single_object {
345 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
347 my $document = $parser->parse_string($xml);
349 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
350 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
351 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
352 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
354 $_->source($bib->id) for (@mXfe);
355 $_->record($bib->id) for (@mfr);
356 $rd->record($bib->id) if ($rd);
358 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
360 __PACKAGE__->register_method(
361 api_name => "open-ils.ingest.full.biblio.object.readonly",
362 method => "ro_biblio_ingest_single_object",
367 sub ro_biblio_ingest_single_xml {
370 my $xml = OpenILS::Application::Ingest::entityize(shift);
372 my $document = $parser->parse_string($xml);
374 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
375 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
376 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
377 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
379 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
381 __PACKAGE__->register_method(
382 api_name => "open-ils.ingest.full.biblio.xml.readonly",
383 method => "ro_biblio_ingest_single_xml",
388 sub ro_biblio_ingest_single_record {
393 OpenILS::Application::Ingest->post_init();
394 my $r = OpenSRF::AppSession
395 ->create('open-ils.cstore')
396 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
399 return undef unless ($r and @$r);
401 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
403 $_->source($rec) for (@{$res->{field_entries}});
404 $_->record($rec) for (@{$res->{full_rec}});
405 $res->{descriptor}->record($rec);
409 __PACKAGE__->register_method(
410 api_name => "open-ils.ingest.full.biblio.record.readonly",
411 method => "ro_biblio_ingest_single_record",
416 sub ro_biblio_ingest_stream_record {
420 OpenILS::Application::Ingest->post_init();
422 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
424 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
426 my $rec = $resp->content;
427 last unless (defined $rec);
429 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
430 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
432 $_->source($rec) for (@{$res->{field_entries}});
433 $_->record($rec) for (@{$res->{full_rec}});
435 $client->respond( $res );
440 __PACKAGE__->register_method(
441 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
442 method => "ro_biblio_ingest_stream_record",
447 sub ro_biblio_ingest_stream_xml {
451 OpenILS::Application::Ingest->post_init();
453 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
455 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
457 my $xml = $resp->content;
458 last unless (defined $xml);
460 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
461 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
463 $client->respond( $res );
468 __PACKAGE__->register_method(
469 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
470 method => "ro_biblio_ingest_stream_xml",
475 sub rw_biblio_ingest_stream_import {
479 OpenILS::Application::Ingest->post_init();
481 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
483 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
485 my $bib = $resp->content;
486 last unless (defined $bib);
488 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
489 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
491 $_->source($bib->id) for (@{$res->{field_entries}});
492 $_->record($bib->id) for (@{$res->{full_rec}});
494 $client->respond( $res );
499 __PACKAGE__->register_method(
500 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
501 method => "rw_biblio_ingest_stream_import",
507 # --------------------------------------------------------------------------------
510 package OpenILS::Application::Ingest::Authority;
511 use base qw/OpenILS::Application::Ingest/;
512 use Unicode::Normalize;
514 sub ro_authority_ingest_single_object {
518 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
520 my $document = $parser->parse_string($xml);
522 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
524 $_->record($bib->id) for (@mfr);
526 return { full_rec => \@mfr };
528 __PACKAGE__->register_method(
529 api_name => "open-ils.ingest.full.authority.object.readonly",
530 method => "ro_authority_ingest_single_object",
535 sub ro_authority_ingest_single_xml {
538 my $xml = OpenILS::Application::Ingest::entityize(shift);
540 my $document = $parser->parse_string($xml);
542 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
544 return { full_rec => \@mfr };
546 __PACKAGE__->register_method(
547 api_name => "open-ils.ingest.full.authority.xml.readonly",
548 method => "ro_authority_ingest_single_xml",
553 sub ro_authority_ingest_single_record {
558 OpenILS::Application::Ingest->post_init();
559 my $r = OpenSRF::AppSession
560 ->create('open-ils.cstore')
561 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
564 return undef unless ($r and @$r);
566 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
568 $_->record($rec) for (@{$res->{full_rec}});
569 $res->{descriptor}->record($rec);
573 __PACKAGE__->register_method(
574 api_name => "open-ils.ingest.full.authority.record.readonly",
575 method => "ro_authority_ingest_single_record",
580 sub ro_authority_ingest_stream_record {
584 OpenILS::Application::Ingest->post_init();
586 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
588 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
590 my $rec = $resp->content;
591 last unless (defined $rec);
593 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
594 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
596 $_->record($rec) for (@{$res->{full_rec}});
598 $client->respond( $res );
603 __PACKAGE__->register_method(
604 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
605 method => "ro_authority_ingest_stream_record",
610 sub ro_authority_ingest_stream_xml {
614 OpenILS::Application::Ingest->post_init();
616 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
618 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
620 my $xml = $resp->content;
621 last unless (defined $xml);
623 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
624 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
626 $client->respond( $res );
631 __PACKAGE__->register_method(
632 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
633 method => "ro_authority_ingest_stream_xml",
638 sub rw_authority_ingest_stream_import {
642 OpenILS::Application::Ingest->post_init();
644 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
646 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
648 my $bib = $resp->content;
649 last unless (defined $bib);
651 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
652 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
654 $_->record($bib->id) for (@{$res->{full_rec}});
656 $client->respond( $res );
661 __PACKAGE__->register_method(
662 api_name => "open-ils.ingest.full.authority.bib_stream.import",
663 method => "rw_authority_ingest_stream_import",
669 # --------------------------------------------------------------------------------
670 # MARC index extraction
672 package OpenILS::Application::Ingest::XPATH;
673 use base qw/OpenILS::Application::Ingest/;
674 use Unicode::Normalize;
676 # give this an XML documentElement and an XPATH expression
677 sub xpath_to_string {
681 my $ns_prefix = shift;
684 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
688 # grab the set of matching nodes
689 my @nodes = $xml->findnodes( $xpath );
690 for my $value (@nodes) {
692 # grab all children of the node
693 my @children = $value->childNodes();
694 for my $child (@children) {
696 # add the childs content to the growing buffer
697 my $content = quotemeta($child->textContent);
698 next if ($unique && $string =~ /$content/); # uniquify the values
699 $string .= $child->textContent . " ";
702 $string .= $value->textContent . " ";
706 $string =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
711 sub class_index_string_xml {
717 OpenILS::Application::Ingest->post_init();
718 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
722 for my $class (@classes) {
723 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
724 for my $type ( keys %{ $xpathset->{$class} } ) {
726 my $def = $xpathset->{$class}->{$type};
727 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
732 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
733 $transform_cache{$def->{format}} = $document;
736 my $value = xpath_to_string(
737 $document->documentElement => $def->{xpath},
738 $sf->{ns} => $def->{format},
744 $value = NFD($value);
745 $value =~ s/\pM+//sgo;
746 $value =~ s/\pC+//sgo;
747 $value =~ s/\W+$//sgo;
749 $value =~ s/\b\.+\b//sgo;
752 my $fm = $class_constructor->new;
753 $fm->value( $value );
754 $fm->field( $xpathset->{$class}->{$type}->{id} );
755 $client->respond($fm);
760 __PACKAGE__->register_method(
761 api_name => "open-ils.ingest.field_entry.class.xml",
762 method => "class_index_string_xml",
768 sub class_index_string_record {
774 OpenILS::Application::Ingest->post_init();
775 my $r = OpenSRF::AppSession
776 ->create('open-ils.cstore')
777 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
780 return undef unless ($r and @$r);
782 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
784 $client->respond($fm);
788 __PACKAGE__->register_method(
789 api_name => "open-ils.ingest.field_entry.class.record",
790 method => "class_index_string_record",
796 sub all_index_string_xml {
801 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
802 $client->respond($fm);
806 __PACKAGE__->register_method(
807 api_name => "open-ils.ingest.extract.field_entry.all.xml",
808 method => "all_index_string_xml",
814 sub all_index_string_record {
819 OpenILS::Application::Ingest->post_init();
820 my $r = OpenSRF::AppSession
821 ->create('open-ils.cstore')
822 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
825 return undef unless ($r and @$r);
827 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
829 $client->respond($fm);
833 __PACKAGE__->register_method(
834 api_name => "open-ils.ingest.extract.field_entry.all.record",
835 method => "all_index_string_record",
841 # --------------------------------------------------------------------------------
844 package OpenILS::Application::Ingest::FlatMARC;
845 use base qw/OpenILS::Application::Ingest/;
846 use Unicode::Normalize;
849 sub _marcxml_to_full_rows {
852 my $xmltype = shift || 'metabib';
854 my $type = "Fieldmapper::${xmltype}::full_rec";
858 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
860 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
861 next unless $tagline;
866 my $val = $tagline->textContent;
876 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
877 next unless $tagline;
881 $ns->tag( $tagline->getAttribute( "tag" ) );
882 my $val = $tagline->textContent;
892 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
893 next unless $tagline;
895 my $tag = $tagline->getAttribute( "tag" );
896 my $ind1 = $tagline->getAttribute( "ind1" );
897 my $ind2 = $tagline->getAttribute( "ind2" );
899 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
907 $ns->subfield( $data->getAttribute( "code" ) );
908 my $val = $data->textContent;
913 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
914 $ns->value( lc($val) );
919 if ($xmltype eq 'metabib' and $tag eq '245') {
922 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
923 next unless ($data and $data->getAttribute( "code" ) eq 'a');
930 $ns->subfield( $data->getAttribute( "code" ) );
931 my $val = substr( $data->textContent, $ind2 );
936 $val =~ s/(\d{4})-(\d{4})/$1 $2/sgo;
937 $ns->value( lc($val) );
944 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
953 $log->debug("processing [$xml]");
955 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
957 my $type = 'metabib';
958 $type = 'authority' if ($self->api_name =~ /authority/o);
960 OpenILS::Application::Ingest->post_init();
962 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
965 __PACKAGE__->register_method(
966 api_name => "open-ils.ingest.flat_marc.authority.xml",
967 method => "flat_marc_xml",
972 __PACKAGE__->register_method(
973 api_name => "open-ils.ingest.flat_marc.biblio.xml",
974 method => "flat_marc_xml",
980 sub flat_marc_record {
986 $type = 'authority' if ($self->api_name =~ /authority/o);
988 OpenILS::Application::Ingest->post_init();
989 my $r = OpenSRF::AppSession
990 ->create('open-ils.cstore')
991 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
995 return undef unless ($r and $r->marc);
997 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
998 for my $row (@rows) {
999 $client->respond($row);
1000 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
1004 __PACKAGE__->register_method(
1005 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
1006 method => "flat_marc_record",
1011 __PACKAGE__->register_method(
1012 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
1013 method => "flat_marc_record",
1019 # --------------------------------------------------------------------------------
1022 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1023 use base qw/OpenILS::Application::Ingest/;
1024 use Unicode::Normalize;
1025 use OpenSRF::EX qw/:try/;
1027 sub biblio_fingerprint_record {
1032 OpenILS::Application::Ingest->post_init();
1034 my $r = OpenSRF::AppSession
1035 ->create('open-ils.cstore')
1036 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
1039 return undef unless ($r and $r->marc);
1041 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
1042 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
1043 $fp->{quality} = int($fp->{quality});
1046 __PACKAGE__->register_method(
1047 api_name => "open-ils.ingest.fingerprint.record",
1048 method => "biblio_fingerprint_record",
1054 sub biblio_fingerprint {
1057 my $xml = OpenILS::Application::Ingest::entityize(shift);
1059 $log->internal("Got MARC [$xml]");
1062 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1063 my $conf = OpenSRF::Utils::SettingsClient->new;
1065 my $libs = $conf->config_value(@pfx, 'script_path');
1066 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
1067 my $script_libs = (ref($libs)) ? $libs : [$libs];
1069 $log->debug("Loading script $script_file for biblio fingerprinting...");
1071 $fp_script = new OpenILS::Utils::ScriptRunner
1072 ( file => $script_file,
1073 paths => $script_libs,
1074 reset_count => 100 );
1077 $fp_script->insert('environment' => {marc => $xml} => 1);
1079 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
1080 $log->debug("Script for biblio fingerprinting completed successfully...");
1084 __PACKAGE__->register_method(
1085 api_name => "open-ils.ingest.fingerprint.xml",
1086 method => "biblio_fingerprint",
1092 sub biblio_descriptor {
1095 my $xml = OpenILS::Application::Ingest::entityize(shift);
1097 $log->internal("Got MARC [$xml]");
1100 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1101 my $conf = OpenSRF::Utils::SettingsClient->new;
1103 my $libs = $conf->config_value(@pfx, 'script_path');
1104 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1105 my $script_libs = (ref($libs)) ? $libs : [$libs];
1107 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1109 $rd_script = new OpenILS::Utils::ScriptRunner
1110 ( file => $script_file,
1111 paths => $script_libs,
1112 reset_count => 100 );
1115 $log->debug("Setting up environment for descriptor extraction script...");
1116 $rd_script->insert('environment.marc' => $xml => 1);
1117 $log->debug("Environment building complete...");
1119 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1120 $log->debug("Script for biblio descriptor extraction completed successfully");
1122 my $d1 = $res->date1;
1123 if ($d1 && $d1 ne ' ') {
1128 my $d2 = $res->date2;
1129 if ($d2 && $d2 ne ' ') {
1136 __PACKAGE__->register_method(
1137 api_name => "open-ils.ingest.descriptor.xml",
1138 method => "biblio_descriptor",