1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
21 our %supported_formats = (
22 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
23 mods => {ns => 'http://www.loc.gov/mods/'},
24 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
26 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28 atom => {ns => 'http://www.w3.org/2005/Atom'},
29 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
33 rss10 => {ns => 'http://purl.org/rss/1.0/'},
34 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
39 my $log = 'OpenSRF::Utils::Logger';
41 my $parser = XML::LibXML->new();
42 my $xslt = XML::LibXSLT->new();
52 unless (keys %$xpathset) {
53 $log->debug("Running post_init", DEBUG);
55 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57 unless ($supported_formats{mods}{xslt}) {
58 $log->debug("Loading MODS XSLT", DEBUG);
59 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63 unless ($supported_formats{mods3}{xslt}) {
64 $log->debug("Loading MODS v3 XSLT", DEBUG);
65 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
70 my $req = OpenSRF::AppSession
71 ->create('open-ils.cstore')
72 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75 if (ref $req and @$req) {
77 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
96 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
100 # --------------------------------------------------------------------------------
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
107 sub rw_biblio_ingest_single_object {
112 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113 return undef unless ($blob);
115 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116 $bib->quality( $blob->{fingerprint}->{quality} );
118 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
120 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
122 # update full_rec stuff ...
123 my $tmp = $cstore->request(
124 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125 { record => $bib->id }
128 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
131 # update rec_descriptor stuff ...
132 $tmp = $cstore->request(
133 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134 { record => $bib->id }
137 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
140 # deal with classed fields...
141 for my $class ( qw/title author subject keyword series/ ) {
142 $tmp = $cstore->request(
143 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144 { source => $bib->id }
147 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
149 for my $obj ( @{ $blob->{field_entries} } ) {
150 my $class = $obj->class_name;
151 $class =~ s/^Fieldmapper:://o;
153 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
158 $tmp = $cstore->request(
159 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160 { source => $bib->id }
163 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
166 # Get the matchin MR, if any.
167 my $mr = $cstore->request(
168 'open-ils.cstore.direct.metabib.metarecord.search',
169 { fingerprint => $bib->fingerprint }
173 $mr = new Fieldmapper::metabib::metarecord;
174 $mr->fingerprint( $bib->fingerprint );
175 $mr->master_record( $bib->id );
178 "open-ils.cstore.direct.metabib.metarecord.create",
179 $mr => { quiet => 'true' }
183 my $mrm = $cstore->request(
184 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185 { metarecord => $mr->id }
189 my $best = $cstore->request(
190 "open-ils.cstore.direct.biblio.record_entry.search",
191 { id => [ map { $_->source } @$mrm ] },
192 { 'select' => { bre => [ qw/id quality/ ] },
193 order_by => { bre => "quality desc" },
198 if ($best->quality > $bib->quality) {
199 $mr->master_record($best->id);
201 $mr->master_record($bib->id);
204 $mr->master_record($bib->id);
209 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
212 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
213 $mrm->source($bib->id);
214 $mrm->metarecord($mr->id);
216 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
217 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
219 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
223 __PACKAGE__->register_method(
224 api_name => "open-ils.ingest.full.biblio.object",
225 method => "rw_biblio_ingest_single_object",
230 sub rw_biblio_ingest_single_record {
235 OpenILS::Application::Ingest->post_init();
236 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
237 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
239 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
241 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
244 return undef unless ($r and @$r);
246 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
248 __PACKAGE__->register_method(
249 api_name => "open-ils.ingest.full.biblio.record",
250 method => "rw_biblio_ingest_single_record",
255 sub ro_biblio_ingest_single_object {
259 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
261 my $document = $parser->parse_string($xml);
263 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
264 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
265 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
266 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
268 $_->source($bib->id) for (@mXfe);
269 $_->record($bib->id) for (@mfr);
270 $rd->record($bib->id) if ($rd);
272 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
274 __PACKAGE__->register_method(
275 api_name => "open-ils.ingest.full.biblio.object.readonly",
276 method => "ro_biblio_ingest_single_object",
281 sub ro_biblio_ingest_single_xml {
284 my $xml = OpenILS::Application::Ingest::entityize(shift);
286 my $document = $parser->parse_string($xml);
288 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
289 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
290 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
291 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
293 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
295 __PACKAGE__->register_method(
296 api_name => "open-ils.ingest.full.biblio.xml.readonly",
297 method => "ro_biblio_ingest_single_xml",
302 sub ro_biblio_ingest_single_record {
307 OpenILS::Application::Ingest->post_init();
308 my $r = OpenSRF::AppSession
309 ->create('open-ils.cstore')
310 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
313 return undef unless ($r and @$r);
315 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
317 $_->source($rec) for (@{$res->{field_entries}});
318 $_->record($rec) for (@{$res->{full_rec}});
319 $res->{descriptor}->record($rec);
323 __PACKAGE__->register_method(
324 api_name => "open-ils.ingest.full.biblio.record.readonly",
325 method => "ro_biblio_ingest_single_record",
330 sub ro_biblio_ingest_stream_record {
334 OpenILS::Application::Ingest->post_init();
336 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
338 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
340 my $rec = $resp->content;
341 last unless (defined $rec);
343 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
344 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
346 $_->source($rec) for (@{$res->{field_entries}});
347 $_->record($rec) for (@{$res->{full_rec}});
349 $client->respond( $res );
354 __PACKAGE__->register_method(
355 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
356 method => "ro_biblio_ingest_stream_record",
361 sub ro_biblio_ingest_stream_xml {
365 OpenILS::Application::Ingest->post_init();
367 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
369 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
371 my $xml = $resp->content;
372 last unless (defined $xml);
374 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
375 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
377 $client->respond( $res );
382 __PACKAGE__->register_method(
383 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
384 method => "ro_biblio_ingest_stream_xml",
389 sub rw_biblio_ingest_stream_import {
393 OpenILS::Application::Ingest->post_init();
395 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
397 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
399 my $bib = $resp->content;
400 last unless (defined $bib);
402 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
403 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
405 $_->source($bib->id) for (@{$res->{field_entries}});
406 $_->record($bib->id) for (@{$res->{full_rec}});
408 $client->respond( $res );
413 __PACKAGE__->register_method(
414 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
415 method => "rw_biblio_ingest_stream_import",
421 # --------------------------------------------------------------------------------
424 package OpenILS::Application::Ingest::Authority;
425 use base qw/OpenILS::Application::Ingest/;
426 use Unicode::Normalize;
428 sub ro_authority_ingest_single_object {
432 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
434 my $document = $parser->parse_string($xml);
436 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
438 $_->record($bib->id) for (@mfr);
440 return { full_rec => \@mfr };
442 __PACKAGE__->register_method(
443 api_name => "open-ils.ingest.full.authority.object.readonly",
444 method => "ro_authority_ingest_single_object",
449 sub ro_authority_ingest_single_xml {
452 my $xml = OpenILS::Application::Ingest::entityize(shift);
454 my $document = $parser->parse_string($xml);
456 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
458 return { full_rec => \@mfr };
460 __PACKAGE__->register_method(
461 api_name => "open-ils.ingest.full.authority.xml.readonly",
462 method => "ro_authority_ingest_single_xml",
467 sub ro_authority_ingest_single_record {
472 OpenILS::Application::Ingest->post_init();
473 my $r = OpenSRF::AppSession
474 ->create('open-ils.cstore')
475 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
478 return undef unless ($r and @$r);
480 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
482 $_->record($rec) for (@{$res->{full_rec}});
483 $res->{descriptor}->record($rec);
487 __PACKAGE__->register_method(
488 api_name => "open-ils.ingest.full.authority.record.readonly",
489 method => "ro_authority_ingest_single_record",
494 sub ro_authority_ingest_stream_record {
498 OpenILS::Application::Ingest->post_init();
500 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
502 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
504 my $rec = $resp->content;
505 last unless (defined $rec);
507 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
508 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
510 $_->record($rec) for (@{$res->{full_rec}});
512 $client->respond( $res );
517 __PACKAGE__->register_method(
518 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
519 method => "ro_authority_ingest_stream_record",
524 sub ro_authority_ingest_stream_xml {
528 OpenILS::Application::Ingest->post_init();
530 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
532 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
534 my $xml = $resp->content;
535 last unless (defined $xml);
537 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
538 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
540 $client->respond( $res );
545 __PACKAGE__->register_method(
546 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
547 method => "ro_authority_ingest_stream_xml",
552 sub rw_authority_ingest_stream_import {
556 OpenILS::Application::Ingest->post_init();
558 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
560 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
562 my $bib = $resp->content;
563 last unless (defined $bib);
565 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
566 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
568 $_->record($bib->id) for (@{$res->{full_rec}});
570 $client->respond( $res );
575 __PACKAGE__->register_method(
576 api_name => "open-ils.ingest.full.authority.bib_stream.import",
577 method => "rw_authority_ingest_stream_import",
583 # --------------------------------------------------------------------------------
584 # MARC index extraction
586 package OpenILS::Application::Ingest::XPATH;
587 use base qw/OpenILS::Application::Ingest/;
588 use Unicode::Normalize;
590 # give this an XML documentElement and an XPATH expression
591 sub xpath_to_string {
595 my $ns_prefix = shift;
598 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
602 # grab the set of matching nodes
603 my @nodes = $xml->findnodes( $xpath );
604 for my $value (@nodes) {
606 # grab all children of the node
607 my @children = $value->childNodes();
608 for my $child (@children) {
610 # add the childs content to the growing buffer
611 my $content = quotemeta($child->textContent);
612 next if ($unique && $string =~ /$content/); # uniquify the values
613 $string .= $child->textContent . " ";
616 $string .= $value->textContent . " ";
622 sub class_index_string_xml {
628 OpenILS::Application::Ingest->post_init();
629 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
633 for my $class (@classes) {
634 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
635 for my $type ( keys %{ $xpathset->{$class} } ) {
637 my $def = $xpathset->{$class}->{$type};
638 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
643 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
644 $transform_cache{$def->{format}} = $document;
647 my $value = xpath_to_string(
648 $document->documentElement => $def->{xpath},
649 $sf->{ns} => $def->{format},
655 $value = NFD($value);
656 $value =~ s/\pM+//sgo;
657 $value =~ s/\pC+//sgo;
658 $value =~ s/\W+$//sgo;
660 $value =~ s/(\w)\.+(\w)/$1$2/sgo;
663 my $fm = $class_constructor->new;
664 $fm->value( $value );
665 $fm->field( $xpathset->{$class}->{$type}->{id} );
666 $client->respond($fm);
671 __PACKAGE__->register_method(
672 api_name => "open-ils.ingest.field_entry.class.xml",
673 method => "class_index_string_xml",
679 sub class_index_string_record {
685 OpenILS::Application::Ingest->post_init();
686 my $r = OpenSRF::AppSession
687 ->create('open-ils.cstore')
688 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
691 return undef unless ($r and @$r);
693 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
695 $client->respond($fm);
699 __PACKAGE__->register_method(
700 api_name => "open-ils.ingest.field_entry.class.record",
701 method => "class_index_string_record",
707 sub all_index_string_xml {
712 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
713 $client->respond($fm);
717 __PACKAGE__->register_method(
718 api_name => "open-ils.ingest.extract.field_entry.all.xml",
719 method => "all_index_string_xml",
725 sub all_index_string_record {
730 OpenILS::Application::Ingest->post_init();
731 my $r = OpenSRF::AppSession
732 ->create('open-ils.cstore')
733 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
736 return undef unless ($r and @$r);
738 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
740 $client->respond($fm);
744 __PACKAGE__->register_method(
745 api_name => "open-ils.ingest.extract.field_entry.all.record",
746 method => "all_index_string_record",
752 # --------------------------------------------------------------------------------
755 package OpenILS::Application::Ingest::FlatMARC;
756 use base qw/OpenILS::Application::Ingest/;
757 use Unicode::Normalize;
760 sub _marcxml_to_full_rows {
763 my $xmltype = shift || 'metabib';
765 my $type = "Fieldmapper::${xmltype}::full_rec";
769 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
771 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
772 next unless $tagline;
777 my $val = $tagline->textContent;
787 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
788 next unless $tagline;
792 $ns->tag( $tagline->getAttribute( "tag" ) );
793 my $val = $tagline->textContent;
803 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
804 next unless $tagline;
806 my $tag = $tagline->getAttribute( "tag" );
807 my $ind1 = $tagline->getAttribute( "ind1" );
808 my $ind2 = $tagline->getAttribute( "ind2" );
810 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
818 $ns->subfield( $data->getAttribute( "code" ) );
819 my $val = $data->textContent;
824 $ns->value( lc($val) );
830 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
839 $log->debug("processing [$xml]");
841 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
843 my $type = 'metabib';
844 $type = 'authority' if ($self->api_name =~ /authority/o);
846 OpenILS::Application::Ingest->post_init();
848 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
851 __PACKAGE__->register_method(
852 api_name => "open-ils.ingest.flat_marc.authority.xml",
853 method => "flat_marc_xml",
858 __PACKAGE__->register_method(
859 api_name => "open-ils.ingest.flat_marc.biblio.xml",
860 method => "flat_marc_xml",
866 sub flat_marc_record {
872 $type = 'authority' if ($self->api_name =~ /authority/o);
874 OpenILS::Application::Ingest->post_init();
875 my $r = OpenSRF::AppSession
876 ->create('open-ils.cstore')
877 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
881 return undef unless ($r and $r->marc);
883 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
884 for my $row (@rows) {
885 $client->respond($row);
886 $log->debug(JSON->perl2JSON($row), DEBUG);
890 __PACKAGE__->register_method(
891 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
892 method => "flat_marc_record",
897 __PACKAGE__->register_method(
898 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
899 method => "flat_marc_record",
905 # --------------------------------------------------------------------------------
908 package OpenILS::Application::Ingest::Biblio::Fingerprint;
909 use base qw/OpenILS::Application::Ingest/;
910 use Unicode::Normalize;
911 use OpenSRF::EX qw/:try/;
913 sub biblio_fingerprint_record {
918 OpenILS::Application::Ingest->post_init();
920 my $r = OpenSRF::AppSession
921 ->create('open-ils.cstore')
922 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
925 return undef unless ($r and $r->marc);
927 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
928 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
929 $fp->{quality} = int($fp->{quality});
932 __PACKAGE__->register_method(
933 api_name => "open-ils.ingest.fingerprint.record",
934 method => "biblio_fingerprint_record",
940 sub biblio_fingerprint {
943 my $xml = OpenILS::Application::Ingest::entityize(shift);
945 $log->internal("Got MARC [$xml]");
948 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
949 my $conf = OpenSRF::Utils::SettingsClient->new;
951 my $libs = $conf->config_value(@pfx, 'script_path');
952 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
953 my $script_libs = (ref($libs)) ? $libs : [$libs];
955 $log->debug("Loading script $script_file for biblio fingerprinting...");
957 $fp_script = new OpenILS::Utils::ScriptRunner
958 ( file => $script_file,
959 paths => $script_libs,
960 reset_count => 100 );
963 $fp_script->insert('environment' => {marc => $xml} => 1);
965 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
966 $log->debug("Script for biblio fingerprinting completed successfully...");
970 __PACKAGE__->register_method(
971 api_name => "open-ils.ingest.fingerprint.xml",
972 method => "biblio_fingerprint",
978 sub biblio_descriptor {
981 my $xml = OpenILS::Application::Ingest::entityize(shift);
983 $log->internal("Got MARC [$xml]");
986 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
987 my $conf = OpenSRF::Utils::SettingsClient->new;
989 my $libs = $conf->config_value(@pfx, 'script_path');
990 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
991 my $script_libs = (ref($libs)) ? $libs : [$libs];
993 $log->debug("Loading script $script_file for biblio descriptor extraction...");
995 $rd_script = new OpenILS::Utils::ScriptRunner
996 ( file => $script_file,
997 paths => $script_libs,
998 reset_count => 100 );
1001 $log->debug("Setting up environment for descriptor extraction script...");
1002 $rd_script->insert('environment.marc' => $xml => 1);
1003 $log->debug("Environment building complete...");
1005 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1006 $log->debug("Script for biblio descriptor extraction completed successfully");
1010 __PACKAGE__->register_method(
1011 api_name => "open-ils.ingest.descriptor.xml",
1012 method => "biblio_descriptor",
1022 sub in_transaction {
1023 OpenILS::Application::Ingest->post_init();
1024 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1027 sub begin_transaction {
1031 OpenILS::Application::Ingest->post_init();
1032 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1036 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1037 #__PACKAGE__->st_sess->connect;
1038 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1039 unless (defined $r and $r) {
1040 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1041 #__PACKAGE__->st_sess->disconnect;
1042 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1046 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1049 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1052 sub rollback_transaction {
1056 OpenILS::Application::Ingest->post_init();
1057 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1061 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1063 $log->debug("Ingest isn't inside a transaction.", INFO);
1065 } catch Error with {
1066 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1072 sub commit_transaction {
1076 OpenILS::Application::Ingest->post_init();
1077 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1080 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1082 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1083 unless (defined $r and $r) {
1084 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1085 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1087 #__PACKAGE__->st_sess->disconnect;
1089 $log->debug("Ingest isn't inside a transaction.", INFO);
1091 } catch Error with {
1092 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1101 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1102 return shift( @res );
1105 sub scrub_authority_record {
1111 if (!OpenILS::Application::Ingest->in_transaction) {
1112 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1118 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1120 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1121 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1123 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1125 $log->debug('Scrubbing failed : '.shift(), ERROR);
1126 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1130 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1131 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1134 __PACKAGE__->register_method(
1135 api_name => "open-ils.worm.scrub.authority",
1136 method => "scrub_authority_record",
1142 sub scrub_metabib_record {
1147 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1148 $rec = OpenILS::Application::Ingest->storage_req(
1149 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1154 if (!OpenILS::Application::Ingest->in_transaction) {
1155 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1161 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1163 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1164 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1165 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1166 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1167 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1168 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1169 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1170 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1172 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1173 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1175 for my $mr (@$masters) {
1176 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1177 my $others = OpenILS::Application::Ingest->storage_req(
1178 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1181 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1182 $mr->master_record($others->[0]->source);
1183 OpenILS::Application::Ingest->storage_req(
1184 'open-ils.storage.direct.metabib.metarecord.remote_update',
1186 { master_record => $others->[0]->source, mods => undef }
1189 warn "Removing metarecord whose master is $rec";
1190 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1191 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1192 warn "Metarecord removed";
1193 $log->debug( "Metarecord removed", DEBUG);
1197 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1200 $log->debug('Scrubbing failed : '.shift(), ERROR);
1201 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1205 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1206 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1209 __PACKAGE__->register_method(
1210 api_name => "open-ils.worm.scrub.biblio",
1211 method => "scrub_metabib_record",
1216 sub wormize_biblio_metarecord {
1221 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1224 for my $r (@$recs) {
1227 $success = wormize_biblio_record($self => $client => $r->source);
1229 { record => $r->source,
1230 metarecord => $rec->metarecord,
1231 success => $success,
1234 } catch Error with {
1237 { record => $r->source,
1238 metarecord => $rec->metarecord,
1239 success => $success,
1247 __PACKAGE__->register_method(
1248 api_name => "open-ils.worm.wormize.metarecord",
1249 method => "wormize_biblio_metarecord",
1254 __PACKAGE__->register_method(
1255 api_name => "open-ils.worm.wormize.metarecord.nomap",
1256 method => "wormize_biblio_metarecord",
1261 __PACKAGE__->register_method(
1262 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1263 method => "wormize_biblio_metarecord",
1268 __PACKAGE__->register_method(
1269 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1270 method => "wormize_biblio_metarecord",
1277 sub wormize_biblio_record {
1282 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1283 $rec = OpenILS::Application::Ingest->storage_req(
1284 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1290 if (!OpenILS::Application::Ingest->in_transaction) {
1291 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1297 # clean up the cruft
1298 unless ($self->api_name =~ /noscrub/o) {
1299 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1303 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1306 my @rec_descriptor = ();
1314 my %metarecord = ();
1315 my @source_map = ();
1316 for my $r (@$bibs) {
1318 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1320 my $xml = $parser->parse_string($r->marc);
1322 #update the fingerprint
1323 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1324 OpenILS::Application::Ingest->storage_req(
1325 'open-ils.storage.direct.biblio.record_entry.remote_update',
1327 { fingerprint => $fp->{fingerprint},
1328 quality => int($fp->{quality}) }
1329 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1331 # the full_rec stuff
1332 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1333 $fr->record( $r->id );
1334 push @full_rec, $fr;
1337 # the rec_descriptor stuff
1338 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1339 $rd->record( $r->id );
1340 push @rec_descriptor, $rd;
1342 # the indexing field entry stuff
1343 for my $class ( qw/title author subject keyword series/ ) {
1344 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1345 $fe->source( $r->id );
1346 push @{$field_entry{$class}}, $fe;
1350 unless ($self->api_name =~ /nomap/o) {
1351 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1354 $mr = Fieldmapper::metabib::metarecord->new;
1355 $mr->fingerprint( $fp->{fingerprint} );
1356 $mr->master_record( $r->id );
1357 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1360 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1361 $mr_map->metarecord( $mr->id );
1362 $mr_map->source( $r->id );
1363 push @source_map, $mr_map;
1365 $metarecord{$mr->id} = $mr;
1367 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1369 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1370 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1375 if (@rec_descriptor) {
1376 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1378 OpenILS::Application::Ingest->storage_req(
1379 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1383 for my $mr ( values %metarecord ) {
1384 my $sources = OpenILS::Application::Ingest->storage_req(
1385 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1389 my $bibs = OpenILS::Application::Ingest->storage_req(
1390 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1391 [ map { $_->source } @$sources ]
1394 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1396 OpenILS::Application::Ingest->storage_req(
1397 'open-ils.storage.direct.metabib.metarecord.remote_update',
1399 { master_record => $master->id, mods => undef }
1403 OpenILS::Application::Ingest->storage_req(
1404 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1406 ) if (@rec_descriptor);
1408 OpenILS::Application::Ingest->storage_req(
1409 'open-ils.storage.direct.metabib.full_rec.batch.create',
1413 OpenILS::Application::Ingest->storage_req(
1414 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1415 @{ $field_entry{title} }
1416 ) if (@{ $field_entry{title} });
1418 OpenILS::Application::Ingest->storage_req(
1419 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1420 @{ $field_entry{author} }
1421 ) if (@{ $field_entry{author} });
1423 OpenILS::Application::Ingest->storage_req(
1424 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1425 @{ $field_entry{subject} }
1426 ) if (@{ $field_entry{subject} });
1428 OpenILS::Application::Ingest->storage_req(
1429 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1430 @{ $field_entry{keyword} }
1431 ) if (@{ $field_entry{keyword} });
1433 OpenILS::Application::Ingest->storage_req(
1434 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1435 @{ $field_entry{series} }
1436 ) if (@{ $field_entry{series} });
1438 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1444 $log->debug('Wormization failed : '.shift(), ERROR);
1445 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1449 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1450 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1453 __PACKAGE__->register_method(
1454 api_name => "open-ils.worm.wormize.biblio",
1455 method => "wormize_biblio_record",
1459 __PACKAGE__->register_method(
1460 api_name => "open-ils.worm.wormize.biblio.nomap",
1461 method => "wormize_biblio_record",
1465 __PACKAGE__->register_method(
1466 api_name => "open-ils.worm.wormize.biblio.noscrub",
1467 method => "wormize_biblio_record",
1471 __PACKAGE__->register_method(
1472 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1473 method => "wormize_biblio_record",
1478 sub wormize_authority_record {
1484 if (!OpenILS::Application::Ingest->in_transaction) {
1485 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1491 # clean up the cruft
1492 unless ($self->api_name =~ /noscrub/o) {
1493 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1497 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1500 my @rec_descriptor = ();
1501 for my $r (@$bibs) {
1502 my $xml = $parser->parse_string($r->marc);
1504 # the full_rec stuff
1505 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1506 $fr->record( $r->id );
1507 push @full_rec, $fr;
1510 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1511 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1512 #$rd->record( $r->id );
1513 #push @rec_descriptor, $rd;
1517 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1519 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1520 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1522 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1525 $log->debug('Wormization failed : '.shift(), ERROR);
1526 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1530 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1531 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1534 __PACKAGE__->register_method(
1535 api_name => "open-ils.worm.wormize.authority",
1536 method => "wormize_authority_record",
1540 __PACKAGE__->register_method(
1541 api_name => "open-ils.worm.wormize.authority.noscrub",
1542 method => "wormize_authority_record",
1548 # --------------------------------------------------------------------------------
1549 # MARC index extraction
1551 package OpenILS::Application::Ingest::XPATH;
1552 use base qw/OpenILS::Application::Ingest/;
1553 use Unicode::Normalize;
1555 # give this a MODS documentElement and an XPATH expression
1556 sub _xpath_to_string {
1560 my $ns_prefix = shift;
1563 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1567 # grab the set of matching nodes
1568 my @nodes = $xml->findnodes( $xpath );
1569 for my $value (@nodes) {
1571 # grab all children of the node
1572 my @children = $value->childNodes();
1573 for my $child (@children) {
1575 # add the childs content to the growing buffer
1576 my $content = quotemeta($child->textContent);
1577 next if ($unique && $string =~ /$content/); # uniquify the values
1578 $string .= $child->textContent . " ";
1581 $string .= $value->textContent . " ";
1584 return NFD($string);
1587 sub class_all_index_string_xml {
1593 OpenILS::Application::Ingest->post_init();
1594 $xml = $parser->parse_string($xml) unless (ref $xml);
1596 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1597 for my $type ( keys %{ $xpathset->{$class} } ) {
1598 my $value = _xpath_to_string(
1599 $mods_sheet->transform($xml)->documentElement,
1600 $xpathset->{$class}->{$type}->{xpath},
1601 "http://www.loc.gov/mods/",
1608 $value = NFD($value);
1609 $value =~ s/\pM+//sgo;
1610 $value =~ s/\pC+//sgo;
1611 $value =~ s/\W+$//sgo;
1613 $value =~ s/(\w)\./$1/sgo;
1614 $value = lc($value);
1616 my $fm = $class_constructor->new;
1617 $fm->value( $value );
1618 $fm->field( $xpathset->{$class}->{$type}->{id} );
1619 $client->respond($fm);
1623 __PACKAGE__->register_method(
1624 api_name => "open-ils.worm.field_entry.class.xml",
1625 method => "class_all_index_string_xml",
1631 sub class_all_index_string_record {
1637 OpenILS::Application::Ingest->post_init();
1638 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1640 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1642 $client->respond($fm);
1646 __PACKAGE__->register_method(
1647 api_name => "open-ils.worm.field_entry.class.record",
1648 method => "class_all_index_string_record",
1655 sub class_index_string_xml {
1662 OpenILS::Application::Ingest->post_init();
1663 $xml = $parser->parse_string($xml) unless (ref $xml);
1664 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1666 __PACKAGE__->register_method(
1667 api_name => "open-ils.worm.class.type.xml",
1668 method => "class_index_string_xml",
1673 sub class_index_string_record {
1680 OpenILS::Application::Ingest->post_init();
1681 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1683 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1684 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1687 __PACKAGE__->register_method(
1688 api_name => "open-ils.worm.class.type.record",
1689 method => "class_index_string_record",
1703 OpenILS::Application::Ingest->post_init();
1704 $xml = $parser->parse_string($xml) unless (ref $xml);
1705 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1707 __PACKAGE__->register_method(
1708 api_name => "open-ils.worm.xpath.xml",
1709 method => "xml_xpath",
1723 OpenILS::Application::Ingest->post_init();
1724 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1726 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1727 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1730 __PACKAGE__->register_method(
1731 api_name => "open-ils.worm.xpath.record",
1732 method => "record_xpath",
1738 # --------------------------------------------------------------------------------
1741 package OpenILS::Application::Ingest::Biblio::Leader;
1742 use base qw/OpenILS::Application::Ingest/;
1743 use Unicode::Normalize;
1745 our %marc_type_groups = (
1748 VIS => q/[gkro]{1}/,
1757 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1761 our %biblio_descriptor_code = (
1762 item_type => sub { substr($ldr,6,1); },
1765 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1766 return substr($oo8,29,1);
1767 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1768 return substr($oo8,23,1);
1772 bib_level => sub { substr($ldr,7,1); },
1773 control_type => sub { substr($ldr,8,1); },
1774 char_encoding => sub { substr($ldr,9,1); },
1775 enc_level => sub { substr($ldr,17,1); },
1776 cat_form => sub { substr($ldr,18,1); },
1777 pub_status => sub { substr($ldr,5,1); },
1778 item_lang => sub { substr($oo8,35,3); },
1779 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1780 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1781 audience => sub { substr($oo8,22,1); },
1784 sub _extract_biblio_descriptors {
1787 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1788 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1789 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1791 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1792 for my $rd_field ( keys %biblio_descriptor_code ) {
1793 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1799 sub extract_biblio_desc_xml {
1804 $xml = $parser->parse_string($xml) unless (ref $xml);
1806 return _extract_biblio_descriptors( $xml );
1808 __PACKAGE__->register_method(
1809 api_name => "open-ils.worm.biblio_leader.xml",
1810 method => "extract_biblio_desc_xml",
1815 sub extract_biblio_desc_record {
1820 OpenILS::Application::Ingest->post_init();
1821 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1823 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1824 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1827 __PACKAGE__->register_method(
1828 api_name => "open-ils.worm.biblio_leader.record",
1829 method => "extract_biblio_desc_record",
1834 # --------------------------------------------------------------------------------
1837 package OpenILS::Application::Ingest::FlatMARC;
1838 use base qw/OpenILS::Application::Ingest/;
1839 use Unicode::Normalize;
1842 sub _marcxml_to_full_rows {
1844 my $marcxml = shift;
1845 my $xmltype = shift || 'metabib';
1847 my $type = "Fieldmapper::${xmltype}::full_rec";
1851 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1853 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1854 next unless $tagline;
1856 my $ns = $type->new;
1859 my $val = $tagline->textContent;
1861 $val =~ s/\pM+//sgo;
1862 $val =~ s/\pC+//sgo;
1863 $val =~ s/\W+$//sgo;
1869 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1870 next unless $tagline;
1872 my $ns = $type->new;
1874 $ns->tag( $tagline->getAttribute( "tag" ) );
1875 my $val = $tagline->textContent;
1877 $val =~ s/\pM+//sgo;
1878 $val =~ s/\pC+//sgo;
1879 $val =~ s/\W+$//sgo;
1885 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1886 next unless $tagline;
1888 my $tag = $tagline->getAttribute( "tag" );
1889 my $ind1 = $tagline->getAttribute( "ind1" );
1890 my $ind2 = $tagline->getAttribute( "ind2" );
1892 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1895 my $ns = $type->new;
1900 $ns->subfield( $data->getAttribute( "code" ) );
1901 my $val = $data->textContent;
1903 $val =~ s/\pM+//sgo;
1904 $val =~ s/\pC+//sgo;
1905 $val =~ s/\W+$//sgo;
1906 $ns->value( lc($val) );
1912 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1921 $xml = $parser->parse_string($xml) unless (ref $xml);
1923 my $type = 'metabib';
1924 $type = 'authority' if ($self->api_name =~ /authority/o);
1926 OpenILS::Application::Ingest->post_init();
1928 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1931 __PACKAGE__->register_method(
1932 api_name => "open-ils.worm.flat_marc.authority.xml",
1933 method => "flat_marc_xml",
1938 __PACKAGE__->register_method(
1939 api_name => "open-ils.worm.flat_marc.biblio.xml",
1940 method => "flat_marc_xml",
1946 sub flat_marc_record {
1951 my $type = 'biblio';
1952 $type = 'authority' if ($self->api_name =~ /authority/o);
1954 OpenILS::Application::Ingest->post_init();
1955 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1957 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1960 __PACKAGE__->register_method(
1961 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1962 method => "flat_marc_record",
1967 __PACKAGE__->register_method(
1968 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1969 method => "flat_marc_record",
1976 # --------------------------------------------------------------------------------
1979 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1980 use base qw/OpenILS::Application::Ingest/;
1981 use Unicode::Normalize;
1982 use OpenSRF::EX qw/:try/;
1984 my @fp_mods_xpath = (
1985 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1988 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1989 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1990 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1991 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1994 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1996 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1997 $text =~ s/\pM+//gso;
1998 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2000 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2001 $text =~ s/\s+/ /sgo;
2002 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2003 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2004 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2005 $text =~ s/\b(?:the|an?)\b//sgo;
2006 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2007 $text =~ s/\[.[^\]]+\]//sgo;
2008 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2009 $text =~ s/\s*[;\/\.]*$//sgo;
2010 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2015 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2016 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2019 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2021 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2022 $text =~ s/\pM+//gso;
2023 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2025 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2026 $text =~ s/\s+/ /sgo;
2027 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2028 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2029 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2030 $text =~ s/,?\s+.*$//sgo;
2031 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2036 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2039 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2040 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2041 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2042 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2043 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2044 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2045 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2046 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2049 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2051 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2052 $text =~ s/\pM+//gso;
2053 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2055 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2056 $text =~ s/\s+/ /sgo;
2057 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2058 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2059 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2060 $text =~ s/\b(?:the|an?)\b//sgo;
2061 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2062 $text =~ s/\[.[^\]]+\]//sgo;
2063 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2064 $text =~ s/\s*[;\/\.]*$//sgo;
2065 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2070 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2071 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2072 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2073 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2076 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2078 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2079 $text =~ s/\pM+//gso;
2080 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2082 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2083 $text =~ s/\s+/ /sgo;
2084 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2085 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2086 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2087 $text =~ s/,?\s+.*$//sgo;
2088 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2095 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2099 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2103 my $match_index = 0;
2104 my $block_index = 1;
2105 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2106 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2108 my $block_name_index = 0;
2109 my $block_value_index = 1;
2110 my $block = $fp_mods_xpath[$block_index];
2111 while ( my $part = $$block[$block_value_index] ) {
2113 for my $xpath ( @{ $part->{xpath} } ) {
2114 $text = $mods->findvalue( $xpath );
2118 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2122 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2123 $fp_string .= $text;
2126 $block_name_index += 2;
2127 $block_value_index += 2;
2131 $fp_string =~ s/\W+//gso;
2132 $log->debug("Fingerprint is [$fp_string]", INFO);;
2142 sub refingerprint_bibrec {
2148 if (!OpenILS::Application::Ingest->in_transaction) {
2149 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2155 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2156 for my $b (@$bibs) {
2157 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2159 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2161 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2163 OpenILS::Application::Ingest->storage_req(
2164 'open-ils.storage.direct.biblio.record_entry.remote_update',
2166 { fingerprint => $fp->{fingerprint},
2167 quality => $fp->{quality} }
2170 if ($self->api_name !~ /nomap/o) {
2171 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2172 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2177 if (ref($old_source_map) and @$old_source_map) {
2178 for my $m (@$old_source_map) {
2179 $old_mrid = $m->metarecord;
2180 OpenILS::Application::Ingest->storage_req(
2181 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2187 my $old_sm = OpenILS::Application::Ingest->storage_req(
2188 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2189 { metarecord => $old_mrid }
2192 if (ref($old_sm) and @$old_sm == 0) {
2193 OpenILS::Application::Ingest->storage_req(
2194 'open-ils.storage.direct.metabib.metarecord.delete',
2199 my $mr = OpenILS::Application::Ingest->storage_req(
2200 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2201 { fingerprint => $fp->{fingerprint} }
2205 $mr = Fieldmapper::metabib::metarecord->new;
2206 $mr->fingerprint( $fp->{fingerprint} );
2207 $mr->master_record( $b->id );
2208 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2211 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2212 $mr_map->metarecord( $mr->id );
2213 $mr_map->source( $b->id );
2214 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2218 $client->respond($b->id);
2222 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2226 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2227 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2230 __PACKAGE__->register_method(
2231 api_name => "open-ils.worm.fingerprint.record.update",
2232 method => "refingerprint_bibrec",
2238 __PACKAGE__->register_method(
2239 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2240 method => "refingerprint_bibrec",
2247 sub fingerprint_bibrec {
2252 OpenILS::Application::Ingest->post_init();
2253 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2255 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2256 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2260 __PACKAGE__->register_method(
2261 api_name => "open-ils.worm.fingerprint.record",
2262 method => "fingerprint_bibrec",
2268 sub fingerprint_mods {
2273 OpenILS::Application::Ingest->post_init();
2274 my $mods = $parser->parse_string($xml)->documentElement;
2276 return _fp_mods( $mods );
2278 __PACKAGE__->register_method(
2279 api_name => "open-ils.worm.fingerprint.mods",
2280 method => "fingerprint_mods",
2285 sub fingerprint_marc {
2290 $xml = $parser->parse_string($xml) unless (ref $xml);
2292 OpenILS::Application::Ingest->post_init();
2293 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2294 $log->debug("Returning [$fp] as fingerprint", INFO);
2297 __PACKAGE__->register_method(
2298 api_name => "open-ils.worm.fingerprint.marc",
2299 method => "fingerprint_marc",
2307 sub biblio_fingerprint_record {
2312 OpenILS::Application::Ingest->post_init();
2314 my $marc = OpenILS::Application::Ingest
2315 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2318 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2319 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2322 __PACKAGE__->register_method(
2323 api_name => "open-ils.worm.fingerprint.record",
2324 method => "biblio_fingerprint_record",
2330 sub biblio_fingerprint {
2335 OpenILS::Application::Ingest->post_init();
2337 $marc = $parser->parse_string($marc) unless (ref $marc);
2339 my $mods = OpenILS::Application::Ingest::entityize(
2341 ->transform( $marc )
2347 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2350 $log->internal("Got MARC [$marc]");
2351 $log->internal("Created MODS [$mods]");
2354 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2355 my $conf = OpenSRF::Utils::SettingsClient->new;
2357 my $libs = $conf->config_value(@pfx, 'script_path');
2358 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2359 my $script_libs = (ref($libs)) ? $libs : [$libs];
2361 $log->debug("Loading script $script_file for biblio fingerprinting...");
2363 $fp_script = new OpenILS::Utils::ScriptRunner
2364 ( file => $script_file,
2365 paths => $script_libs,
2366 reset_count => 1000 );
2369 $log->debug("Applying environment for biblio fingerprinting...");
2371 my $env = {marc => $marc, mods => $mods};
2372 #my $res = {fingerprint => '', quality => '0'};
2374 $fp_script->insert('environment' => $env);
2375 #$fp_script->insert('result' => $res);
2377 $log->debug("Running script for biblio fingerprinting...");
2379 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2381 $log->debug("Script for biblio fingerprinting completed successfully...");
2385 __PACKAGE__->register_method(
2386 api_name => "open-ils.worm.fingerprint.marc",
2387 method => "biblio_fingerprint",
2392 # --------------------------------------------------------------------------------
2406 my $create_source_map;
2421 my %descriptor_code = (
2422 item_type => 'substr($ldr,6,1)',
2423 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2424 bib_level => 'substr($ldr,7,1)',
2425 control_type => 'substr($ldr,8,1)',
2426 char_encoding => 'substr($ldr,9,1)',
2427 enc_level => 'substr($ldr,17,1)',
2428 cat_form => 'substr($ldr,18,1)',
2429 pub_status => 'substr($ldr,5,1)',
2430 item_lang => 'substr($oo8,35,3)',
2431 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2432 audience => 'substr($oo8,22,1)',
2442 if ($self->api_name =~ /no_map/o) {
2446 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2448 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2450 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2452 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2454 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2455 unless ($sm_lookup);
2456 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2457 unless ($mr_lookup);
2458 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2459 unless ($mr_update);
2460 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2462 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2463 unless ($update_entry);
2464 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2465 unless ($rm_old_sm);
2466 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2467 unless ($rm_old_rd);
2468 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2469 unless ($rm_old_fr);
2470 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2471 unless ($rm_old_tr);
2472 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2473 unless ($rm_old_ar);
2474 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2475 unless ($rm_old_sr);
2476 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2477 unless ($rm_old_kr);
2478 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2479 unless ($rm_old_ser);
2480 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2481 unless ($mr_create);
2482 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2483 unless ($create_source_map);
2484 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2485 unless ($rd_create);
2486 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2487 unless ($fr_create);
2488 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2489 unless ($$create{title});
2490 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2491 unless ($$create{author});
2492 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2493 unless ($$create{subject});
2494 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2495 unless ($$create{keyword});
2496 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2497 unless ($$create{series});
2500 my ($outer_xact) = $in_xact->run;
2502 unless ($outer_xact) {
2503 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2504 my ($r) = $begin->run($client);
2505 unless (defined $r and $r) {
2507 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2510 } catch Error with {
2511 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2521 for my $entry ( $lookup->run(@docids) ) {
2522 # step -1: grab the doc from storage
2523 next unless ($entry);
2526 my $xslt_doc = $parser->parse_file(
2527 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2528 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2531 my $xml = $entry->marc;
2532 my $docid = $entry->id;
2533 my $marcdoc = $parser->parse_string($xml);
2534 my $modsdoc = $mods_sheet->transform($marcdoc);
2536 my $mods = $modsdoc->documentElement;
2537 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2539 $entry->fingerprint( fingerprint_mods( $mods ) );
2540 push @entry_list, $entry;
2542 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2545 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2546 if (!$mr || !@$mr) {
2547 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2548 $mr = new Fieldmapper::metabib::metarecord;
2549 $mr->fingerprint( $entry->fingerprint );
2550 $mr->master_record( $entry->id );
2551 my ($new_mr) = $mr_create->run($mr);
2553 unless (defined $mr) {
2554 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2557 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2562 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2563 $sm->metarecord( $mr->id );
2564 $sm->source( $entry->id );
2565 push @source_maps, $sm;
2568 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2569 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2571 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2572 for my $rd_field ( keys %descriptor_code ) {
2573 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2575 $rd_obj->record( $docid );
2576 push @rd_list, $rd_obj;
2578 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2580 # step 2: build the KOHA rows
2581 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2582 $_->record( $docid ) for (@tmp_list);
2583 push @ns_list, @tmp_list;
2587 last unless ($self->api_name =~ /batch$/o);
2590 $rm_old_rd->run( { record => \@docids } );
2591 $rm_old_fr->run( { record => \@docids } );
2592 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2593 $rm_old_tr->run( { source => \@docids } );
2594 $rm_old_ar->run( { source => \@docids } );
2595 $rm_old_sr->run( { source => \@docids } );
2596 $rm_old_kr->run( { source => \@docids } );
2597 $rm_old_ser->run( { source => \@docids } );
2600 my ($sm) = $create_source_map->run(@source_maps);
2601 unless (defined $sm) {
2602 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2604 my ($mr) = $mr_update->run(@mr_list);
2605 unless (defined $mr) {
2606 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2610 my ($re) = $update_entry->run(@entry_list);
2611 unless (defined $re) {
2612 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2615 my ($rd) = $rd_create->run(@rd_list);
2616 unless (defined $rd) {
2617 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2620 my ($fr) = $fr_create->run(@ns_list);
2621 unless (defined $fr) {
2622 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2625 # step 5: insert the new metadata
2626 for my $class ( qw/title author subject keyword series/ ) {
2628 for my $doc ( @mods_data ) {
2629 my ($did) = keys %$doc;
2630 my ($data) = values %$doc;
2632 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2633 for my $row ( keys %{ $$data{$class} } ) {
2634 next unless (exists $$data{$class}{$row});
2635 next unless ($$data{$class}{$row}{value});
2636 my $fm_obj = $fm_constructor->new;
2637 $fm_obj->value( $$data{$class}{$row}{value} );
2638 $fm_obj->field( $$data{$class}{$row}{field_id} );
2639 $fm_obj->source( $did );
2640 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2642 push @md_list, $fm_obj;
2646 my ($cr) = $$create{$class}->run(@md_list);
2647 unless (defined $cr) {
2648 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2652 unless ($outer_xact) {
2653 $log->debug("Commiting transaction started by the Ingest.", INFO);
2654 my ($c) = $commit->run;
2655 unless (defined $c and $c) {
2657 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2663 __PACKAGE__->register_method(
2664 api_name => "open-ils.worm.wormize",
2665 method => "wormize",
2669 __PACKAGE__->register_method(
2670 api_name => "open-ils.worm.wormize.no_map",
2671 method => "wormize",
2675 __PACKAGE__->register_method(
2676 api_name => "open-ils.worm.wormize.batch",
2677 method => "wormize",
2681 __PACKAGE__->register_method(
2682 api_name => "open-ils.worm.wormize.no_map.batch",
2683 method => "wormize",
2698 my $acreate_source_map;
2713 sub authority_wormize {
2720 if ($self->api_name =~ /no_map/o) {
2724 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2726 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2728 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2730 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2732 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2734 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2735 unless ($aupdate_entry);
2736 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2737 unless ($arm_old_rd);
2738 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2739 unless ($arm_old_fr);
2740 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2741 unless ($ard_create);
2742 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2743 unless ($afr_create);
2746 my ($outer_xact) = $in_xact->run;
2748 unless ($outer_xact) {
2749 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2750 my ($r) = $begin->run($client);
2751 unless (defined $r and $r) {
2753 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2756 } catch Error with {
2757 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2767 for my $entry ( $lookup->run(@docids) ) {
2768 # step -1: grab the doc from storage
2769 next unless ($entry);
2772 # my $xslt_doc = $parser->parse_file(
2773 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2774 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2777 my $xml = $entry->marc;
2778 my $docid = $entry->id;
2779 my $marcdoc = $parser->parse_string($xml);
2780 #my $madsdoc = $mads_sheet->transform($marcdoc);
2782 #my $mads = $madsdoc->documentElement;
2783 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2785 push @entry_list, $entry;
2787 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2788 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2790 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2791 for my $rd_field ( keys %descriptor_code ) {
2792 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2794 $rd_obj->record( $docid );
2795 push @rd_list, $rd_obj;
2797 # step 2: build the KOHA rows
2798 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2799 $_->record( $docid ) for (@tmp_list);
2800 push @ns_list, @tmp_list;
2804 last unless ($self->api_name =~ /batch$/o);
2807 $arm_old_rd->run( { record => \@docids } );
2808 $arm_old_fr->run( { record => \@docids } );
2810 my ($rd) = $ard_create->run(@rd_list);
2811 unless (defined $rd) {
2812 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2815 my ($fr) = $fr_create->run(@ns_list);
2816 unless (defined $fr) {
2817 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2820 unless ($outer_xact) {
2821 $log->debug("Commiting transaction started by Ingest.", INFO);
2822 my ($c) = $commit->run;
2823 unless (defined $c and $c) {
2825 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2831 __PACKAGE__->register_method(
2832 api_name => "open-ils.worm.authortiy.wormize",
2833 method => "wormize",
2837 __PACKAGE__->register_method(
2838 api_name => "open-ils.worm.authority.wormize.batch",
2839 method => "wormize",
2845 # --------------------------------------------------------------------------------
2848 sub _marcxml_to_full_rows {
2850 my $marcxml = shift;
2851 my $type = shift || 'Fieldmapper::metabib::full_rec';
2855 my $root = $marcxml->documentElement;
2857 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2858 next unless $tagline;
2860 my $ns = new Fieldmapper::metabib::full_rec;
2863 my $val = NFD($tagline->textContent);
2864 $val =~ s/(\pM+)//gso;
2870 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2871 next unless $tagline;
2873 my $ns = new Fieldmapper::metabib::full_rec;
2875 $ns->tag( $tagline->getAttribute( "tag" ) );
2876 my $val = NFD($tagline->textContent);
2877 $val =~ s/(\pM+)//gso;
2883 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2884 next unless $tagline;
2886 my $tag = $tagline->getAttribute( "tag" );
2887 my $ind1 = $tagline->getAttribute( "ind1" );
2888 my $ind2 = $tagline->getAttribute( "ind2" );
2890 for my $data ( $tagline->childNodes ) {
2893 my $ns = $type->new;
2898 $ns->subfield( $data->getAttribute( "code" ) );
2899 my $val = NFD($data->textContent);
2900 $val =~ s/(\pM+)//gso;
2901 $ns->value( lc($val) );
2909 sub _get_field_value {
2911 my( $root, $xpath ) = @_;
2915 # grab the set of matching nodes
2916 my @nodes = $root->findnodes( $xpath );
2917 for my $value (@nodes) {
2919 # grab all children of the node
2920 my @children = $value->childNodes();
2921 for my $child (@children) {
2923 # add the childs content to the growing buffer
2924 my $content = quotemeta($child->textContent);
2925 next if ($string =~ /$content/); # uniquify the values
2926 $string .= $child->textContent . " ";
2929 $string .= $value->textContent . " ";
2932 $string = NFD($string);
2933 $string =~ s/(\pM)//gso;
2938 sub modsdoc_to_values {
2939 my( $self, $mods ) = @_;
2941 for my $class (keys %$xpathset) {
2942 $data->{$class} = {};
2943 for my $type (keys %{$xpathset->{$class}}) {
2944 $data->{$class}->{$type} = {};
2945 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};