1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
21 our %supported_formats = (
22 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
23 mods => {ns => 'http://www.loc.gov/mods/'},
24 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
26 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28 atom => {ns => 'http://www.w3.org/2005/Atom'},
29 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
33 rss10 => {ns => 'http://purl.org/rss/1.0/'},
34 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
39 my $log = 'OpenSRF::Utils::Logger';
41 my $parser = XML::LibXML->new();
42 my $xslt = XML::LibXSLT->new();
52 unless (keys %$xpathset) {
53 $log->debug("Running post_init", DEBUG);
55 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57 unless ($supported_formats{mods}{xslt}) {
58 $log->debug("Loading MODS XSLT", DEBUG);
59 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63 unless ($supported_formats{mods3}{xslt}) {
64 $log->debug("Loading MODS v3 XSLT", DEBUG);
65 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
70 my $req = OpenSRF::AppSession
71 ->create('open-ils.cstore')
72 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75 if (ref $req and @$req) {
77 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
96 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
100 # --------------------------------------------------------------------------------
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
107 sub rw_biblio_ingest_single_object {
112 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113 return undef unless ($blob);
115 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116 $bib->quality( $blob->{fingerprint}->{quality} );
118 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
120 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
122 # update full_rec stuff ...
123 my $tmp = $cstore->request(
124 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125 { record => $bib->id }
128 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
131 # update rec_descriptor stuff ...
132 $tmp = $cstore->request(
133 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134 { record => $bib->id }
137 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
140 # deal with classed fields...
141 for my $class ( qw/title author subject keyword series/ ) {
142 $tmp = $cstore->request(
143 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144 { source => $bib->id }
147 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
149 for my $obj ( @{ $blob->{field_entries} } ) {
150 my $class = $obj->class_name;
151 $class =~ s/^Fieldmapper:://o;
153 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
158 $tmp = $cstore->request(
159 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160 { source => $bib->id }
163 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
166 # Get the matchin MR, if any.
167 my $mr = $cstore->request(
168 'open-ils.cstore.direct.metabib.metarecord.search',
169 { fingerprint => $bib->fingerprint }
173 $mr = new Fieldmapper::metabib::metarecord;
174 $mr->fingerprint( $bib->fingerprint );
175 $mr->master_record( $bib->id );
178 "open-ils.cstore.direct.metabib.metarecord.create",
179 $mr => { quiet => 'true' }
183 my $mrm = $cstore->request(
184 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185 { metarecord => $mr->id }
189 my $best = $cstore->request(
190 "open-ils.cstore.direct.biblio.record_entry.search",
191 { id => [ map { $_->source } @$mrm ] },
192 { 'select' => { bre => [ qw/id quality/ ] },
193 order_by => { bre => "quality desc" },
198 if ($best->quality > $bib->quality) {
199 $mr->master_record($best->id);
201 $mr->master_record($bib->id);
204 $mr->master_record($bib->id);
207 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
210 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
211 $mrm->source($bib->id);
212 $mrm->metarecord($mr->id);
214 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
215 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
217 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
221 __PACKAGE__->register_method(
222 api_name => "open-ils.ingest.full.biblio.object",
223 method => "rw_biblio_ingest_single_object",
228 sub rw_biblio_ingest_single_record {
233 OpenILS::Application::Ingest->post_init();
234 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
235 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
237 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
239 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
242 return undef unless ($r and @$r);
244 return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
246 __PACKAGE__->register_method(
247 api_name => "open-ils.ingest.full.biblio.record",
248 method => "rw_biblio_ingest_single_record",
253 sub ro_biblio_ingest_single_object {
257 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
259 my $document = $parser->parse_string($xml);
261 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
262 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
263 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
264 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
266 $_->source($bib->id) for (@mXfe);
267 $_->record($bib->id) for (@mfr);
268 $rd->record($bib->id) if ($rd);
270 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
272 __PACKAGE__->register_method(
273 api_name => "open-ils.ingest.full.biblio.object.readonly",
274 method => "ro_biblio_ingest_single_object",
279 sub ro_biblio_ingest_single_xml {
282 my $xml = OpenILS::Application::Ingest::entityize(shift);
284 my $document = $parser->parse_string($xml);
286 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
287 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
288 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
289 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
291 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
293 __PACKAGE__->register_method(
294 api_name => "open-ils.ingest.full.biblio.xml.readonly",
295 method => "ro_biblio_ingest_single_xml",
300 sub ro_biblio_ingest_single_record {
305 OpenILS::Application::Ingest->post_init();
306 my $r = OpenSRF::AppSession
307 ->create('open-ils.cstore')
308 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
311 return undef unless ($r and @$r);
313 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
315 $_->source($rec) for (@{$res->{field_entries}});
316 $_->record($rec) for (@{$res->{full_rec}});
317 $res->{descriptor}->record($rec);
321 __PACKAGE__->register_method(
322 api_name => "open-ils.ingest.full.biblio.record.readonly",
323 method => "ro_biblio_ingest_single_record",
328 sub ro_biblio_ingest_stream_record {
332 OpenILS::Application::Ingest->post_init();
334 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
336 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
338 my $rec = $resp->content;
339 last unless (defined $rec);
341 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
342 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
344 $_->source($rec) for (@{$res->{field_entries}});
345 $_->record($rec) for (@{$res->{full_rec}});
347 $client->respond( $res );
352 __PACKAGE__->register_method(
353 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
354 method => "ro_biblio_ingest_stream_record",
359 sub ro_biblio_ingest_stream_xml {
363 OpenILS::Application::Ingest->post_init();
365 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
367 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
369 my $xml = $resp->content;
370 last unless (defined $xml);
372 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
373 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
375 $client->respond( $res );
380 __PACKAGE__->register_method(
381 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
382 method => "ro_biblio_ingest_stream_xml",
387 sub rw_biblio_ingest_stream_import {
391 OpenILS::Application::Ingest->post_init();
393 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
395 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
397 my $bib = $resp->content;
398 last unless (defined $bib);
400 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
401 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
403 $_->source($bib->id) for (@{$res->{field_entries}});
404 $_->record($bib->id) for (@{$res->{full_rec}});
406 $client->respond( $res );
411 __PACKAGE__->register_method(
412 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
413 method => "rw_biblio_ingest_stream_import",
419 # --------------------------------------------------------------------------------
422 package OpenILS::Application::Ingest::Authority;
423 use base qw/OpenILS::Application::Ingest/;
424 use Unicode::Normalize;
426 sub ro_authority_ingest_single_object {
430 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
432 my $document = $parser->parse_string($xml);
434 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
436 $_->record($bib->id) for (@mfr);
438 return { full_rec => \@mfr };
440 __PACKAGE__->register_method(
441 api_name => "open-ils.ingest.full.authority.object.readonly",
442 method => "ro_authority_ingest_single_object",
447 sub ro_authority_ingest_single_xml {
450 my $xml = OpenILS::Application::Ingest::entityize(shift);
452 my $document = $parser->parse_string($xml);
454 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
456 return { full_rec => \@mfr };
458 __PACKAGE__->register_method(
459 api_name => "open-ils.ingest.full.authority.xml.readonly",
460 method => "ro_authority_ingest_single_xml",
465 sub ro_authority_ingest_single_record {
470 OpenILS::Application::Ingest->post_init();
471 my $r = OpenSRF::AppSession
472 ->create('open-ils.cstore')
473 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
476 return undef unless ($r and @$r);
478 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
480 $_->record($rec) for (@{$res->{full_rec}});
481 $res->{descriptor}->record($rec);
485 __PACKAGE__->register_method(
486 api_name => "open-ils.ingest.full.authority.record.readonly",
487 method => "ro_authority_ingest_single_record",
492 sub ro_authority_ingest_stream_record {
496 OpenILS::Application::Ingest->post_init();
498 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
500 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
502 my $rec = $resp->content;
503 last unless (defined $rec);
505 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
506 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
508 $_->record($rec) for (@{$res->{full_rec}});
510 $client->respond( $res );
515 __PACKAGE__->register_method(
516 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
517 method => "ro_authority_ingest_stream_record",
522 sub ro_authority_ingest_stream_xml {
526 OpenILS::Application::Ingest->post_init();
528 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
530 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
532 my $xml = $resp->content;
533 last unless (defined $xml);
535 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
536 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
538 $client->respond( $res );
543 __PACKAGE__->register_method(
544 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
545 method => "ro_authority_ingest_stream_xml",
550 sub rw_authority_ingest_stream_import {
554 OpenILS::Application::Ingest->post_init();
556 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
558 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
560 my $bib = $resp->content;
561 last unless (defined $bib);
563 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
564 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
566 $_->record($bib->id) for (@{$res->{full_rec}});
568 $client->respond( $res );
573 __PACKAGE__->register_method(
574 api_name => "open-ils.ingest.full.authority.bib_stream.import",
575 method => "rw_authority_ingest_stream_import",
581 # --------------------------------------------------------------------------------
582 # MARC index extraction
584 package OpenILS::Application::Ingest::XPATH;
585 use base qw/OpenILS::Application::Ingest/;
586 use Unicode::Normalize;
588 # give this an XML documentElement and an XPATH expression
589 sub xpath_to_string {
593 my $ns_prefix = shift;
596 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
600 # grab the set of matching nodes
601 my @nodes = $xml->findnodes( $xpath );
602 for my $value (@nodes) {
604 # grab all children of the node
605 my @children = $value->childNodes();
606 for my $child (@children) {
608 # add the childs content to the growing buffer
609 my $content = quotemeta($child->textContent);
610 next if ($unique && $string =~ /$content/); # uniquify the values
611 $string .= $child->textContent . " ";
614 $string .= $value->textContent . " ";
620 sub class_index_string_xml {
626 OpenILS::Application::Ingest->post_init();
627 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
631 for my $class (@classes) {
632 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
633 for my $type ( keys %{ $xpathset->{$class} } ) {
635 my $def = $xpathset->{$class}->{$type};
636 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
641 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
642 $transform_cache{$def->{format}} = $document;
645 my $value = xpath_to_string(
646 $document->documentElement => $def->{xpath},
647 $sf->{ns} => $def->{format},
653 $value = NFD($value);
654 $value =~ s/\pM+//sgo;
655 $value =~ s/\pC+//sgo;
656 $value =~ s/\W+$//sgo;
658 $value =~ s/(\w)\.+(\w)/$1$2/sgo;
661 my $fm = $class_constructor->new;
662 $fm->value( $value );
663 $fm->field( $xpathset->{$class}->{$type}->{id} );
664 $client->respond($fm);
669 __PACKAGE__->register_method(
670 api_name => "open-ils.ingest.field_entry.class.xml",
671 method => "class_index_string_xml",
677 sub class_index_string_record {
683 OpenILS::Application::Ingest->post_init();
684 my $r = OpenSRF::AppSession
685 ->create('open-ils.cstore')
686 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
689 return undef unless ($r and @$r);
691 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
693 $client->respond($fm);
697 __PACKAGE__->register_method(
698 api_name => "open-ils.ingest.field_entry.class.record",
699 method => "class_index_string_record",
705 sub all_index_string_xml {
710 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
711 $client->respond($fm);
715 __PACKAGE__->register_method(
716 api_name => "open-ils.ingest.extract.field_entry.all.xml",
717 method => "all_index_string_xml",
723 sub all_index_string_record {
728 OpenILS::Application::Ingest->post_init();
729 my $r = OpenSRF::AppSession
730 ->create('open-ils.cstore')
731 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
734 return undef unless ($r and @$r);
736 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
738 $client->respond($fm);
742 __PACKAGE__->register_method(
743 api_name => "open-ils.ingest.extract.field_entry.all.record",
744 method => "all_index_string_record",
750 # --------------------------------------------------------------------------------
753 package OpenILS::Application::Ingest::FlatMARC;
754 use base qw/OpenILS::Application::Ingest/;
755 use Unicode::Normalize;
758 sub _marcxml_to_full_rows {
761 my $xmltype = shift || 'metabib';
763 my $type = "Fieldmapper::${xmltype}::full_rec";
767 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
769 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
770 next unless $tagline;
775 my $val = $tagline->textContent;
785 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
786 next unless $tagline;
790 $ns->tag( $tagline->getAttribute( "tag" ) );
791 my $val = $tagline->textContent;
801 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
802 next unless $tagline;
804 my $tag = $tagline->getAttribute( "tag" );
805 my $ind1 = $tagline->getAttribute( "ind1" );
806 my $ind2 = $tagline->getAttribute( "ind2" );
808 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
816 $ns->subfield( $data->getAttribute( "code" ) );
817 my $val = $data->textContent;
822 $ns->value( lc($val) );
828 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
837 $log->debug("processing [$xml]");
839 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
841 my $type = 'metabib';
842 $type = 'authority' if ($self->api_name =~ /authority/o);
844 OpenILS::Application::Ingest->post_init();
846 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
849 __PACKAGE__->register_method(
850 api_name => "open-ils.ingest.flat_marc.authority.xml",
851 method => "flat_marc_xml",
856 __PACKAGE__->register_method(
857 api_name => "open-ils.ingest.flat_marc.biblio.xml",
858 method => "flat_marc_xml",
864 sub flat_marc_record {
870 $type = 'authority' if ($self->api_name =~ /authority/o);
872 OpenILS::Application::Ingest->post_init();
873 my $r = OpenSRF::AppSession
874 ->create('open-ils.cstore')
875 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
879 return undef unless ($r and $r->marc);
881 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
882 for my $row (@rows) {
883 $client->respond($row);
884 $log->debug(JSON->perl2JSON($row), DEBUG);
888 __PACKAGE__->register_method(
889 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
890 method => "flat_marc_record",
895 __PACKAGE__->register_method(
896 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
897 method => "flat_marc_record",
903 # --------------------------------------------------------------------------------
906 package OpenILS::Application::Ingest::Biblio::Fingerprint;
907 use base qw/OpenILS::Application::Ingest/;
908 use Unicode::Normalize;
909 use OpenSRF::EX qw/:try/;
911 sub biblio_fingerprint_record {
916 OpenILS::Application::Ingest->post_init();
918 my $r = OpenSRF::AppSession
919 ->create('open-ils.cstore')
920 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
923 return undef unless ($r and $r->marc);
925 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
926 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
927 $fp->{quality} = int($fp->{quality});
930 __PACKAGE__->register_method(
931 api_name => "open-ils.ingest.fingerprint.record",
932 method => "biblio_fingerprint_record",
938 sub biblio_fingerprint {
941 my $xml = OpenILS::Application::Ingest::entityize(shift);
943 $log->internal("Got MARC [$xml]");
946 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
947 my $conf = OpenSRF::Utils::SettingsClient->new;
949 my $libs = $conf->config_value(@pfx, 'script_path');
950 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
951 my $script_libs = (ref($libs)) ? $libs : [$libs];
953 $log->debug("Loading script $script_file for biblio fingerprinting...");
955 $fp_script = new OpenILS::Utils::ScriptRunner
956 ( file => $script_file,
957 paths => $script_libs,
958 reset_count => 100 );
961 $fp_script->insert('environment' => {marc => $xml} => 1);
963 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
964 $log->debug("Script for biblio fingerprinting completed successfully...");
968 __PACKAGE__->register_method(
969 api_name => "open-ils.ingest.fingerprint.xml",
970 method => "biblio_fingerprint",
976 sub biblio_descriptor {
979 my $xml = OpenILS::Application::Ingest::entityize(shift);
981 $log->internal("Got MARC [$xml]");
984 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
985 my $conf = OpenSRF::Utils::SettingsClient->new;
987 my $libs = $conf->config_value(@pfx, 'script_path');
988 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
989 my $script_libs = (ref($libs)) ? $libs : [$libs];
991 $log->debug("Loading script $script_file for biblio descriptor extraction...");
993 $rd_script = new OpenILS::Utils::ScriptRunner
994 ( file => $script_file,
995 paths => $script_libs,
996 reset_count => 100 );
999 $log->debug("Setting up environment for descriptor extraction script...");
1000 $rd_script->insert('environment.marc' => $xml => 1);
1001 $log->debug("Environment building complete...");
1003 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1004 $log->debug("Script for biblio descriptor extraction completed successfully");
1008 __PACKAGE__->register_method(
1009 api_name => "open-ils.ingest.descriptor.xml",
1010 method => "biblio_descriptor",
1020 sub in_transaction {
1021 OpenILS::Application::Ingest->post_init();
1022 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1025 sub begin_transaction {
1029 OpenILS::Application::Ingest->post_init();
1030 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1034 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1035 #__PACKAGE__->st_sess->connect;
1036 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1037 unless (defined $r and $r) {
1038 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1039 #__PACKAGE__->st_sess->disconnect;
1040 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1044 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1047 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1050 sub rollback_transaction {
1054 OpenILS::Application::Ingest->post_init();
1055 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1059 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1061 $log->debug("Ingest isn't inside a transaction.", INFO);
1063 } catch Error with {
1064 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1070 sub commit_transaction {
1074 OpenILS::Application::Ingest->post_init();
1075 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1078 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1080 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1081 unless (defined $r and $r) {
1082 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1083 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1085 #__PACKAGE__->st_sess->disconnect;
1087 $log->debug("Ingest isn't inside a transaction.", INFO);
1089 } catch Error with {
1090 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1099 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1100 return shift( @res );
1103 sub scrub_authority_record {
1109 if (!OpenILS::Application::Ingest->in_transaction) {
1110 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1116 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1118 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1119 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1121 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1123 $log->debug('Scrubbing failed : '.shift(), ERROR);
1124 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1128 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1129 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1132 __PACKAGE__->register_method(
1133 api_name => "open-ils.worm.scrub.authority",
1134 method => "scrub_authority_record",
1140 sub scrub_metabib_record {
1145 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1146 $rec = OpenILS::Application::Ingest->storage_req(
1147 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1152 if (!OpenILS::Application::Ingest->in_transaction) {
1153 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1159 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1161 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1162 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1163 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1164 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1165 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1166 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1167 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1168 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1170 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1171 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1173 for my $mr (@$masters) {
1174 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1175 my $others = OpenILS::Application::Ingest->storage_req(
1176 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1179 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1180 $mr->master_record($others->[0]->source);
1181 OpenILS::Application::Ingest->storage_req(
1182 'open-ils.storage.direct.metabib.metarecord.remote_update',
1184 { master_record => $others->[0]->source, mods => undef }
1187 warn "Removing metarecord whose master is $rec";
1188 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1189 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1190 warn "Metarecord removed";
1191 $log->debug( "Metarecord removed", DEBUG);
1195 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1198 $log->debug('Scrubbing failed : '.shift(), ERROR);
1199 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1203 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1204 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1207 __PACKAGE__->register_method(
1208 api_name => "open-ils.worm.scrub.biblio",
1209 method => "scrub_metabib_record",
1214 sub wormize_biblio_metarecord {
1219 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1222 for my $r (@$recs) {
1225 $success = wormize_biblio_record($self => $client => $r->source);
1227 { record => $r->source,
1228 metarecord => $rec->metarecord,
1229 success => $success,
1232 } catch Error with {
1235 { record => $r->source,
1236 metarecord => $rec->metarecord,
1237 success => $success,
1245 __PACKAGE__->register_method(
1246 api_name => "open-ils.worm.wormize.metarecord",
1247 method => "wormize_biblio_metarecord",
1252 __PACKAGE__->register_method(
1253 api_name => "open-ils.worm.wormize.metarecord.nomap",
1254 method => "wormize_biblio_metarecord",
1259 __PACKAGE__->register_method(
1260 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1261 method => "wormize_biblio_metarecord",
1266 __PACKAGE__->register_method(
1267 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1268 method => "wormize_biblio_metarecord",
1275 sub wormize_biblio_record {
1280 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1281 $rec = OpenILS::Application::Ingest->storage_req(
1282 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1288 if (!OpenILS::Application::Ingest->in_transaction) {
1289 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1295 # clean up the cruft
1296 unless ($self->api_name =~ /noscrub/o) {
1297 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1301 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1304 my @rec_descriptor = ();
1312 my %metarecord = ();
1313 my @source_map = ();
1314 for my $r (@$bibs) {
1316 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1318 my $xml = $parser->parse_string($r->marc);
1320 #update the fingerprint
1321 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1322 OpenILS::Application::Ingest->storage_req(
1323 'open-ils.storage.direct.biblio.record_entry.remote_update',
1325 { fingerprint => $fp->{fingerprint},
1326 quality => int($fp->{quality}) }
1327 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1329 # the full_rec stuff
1330 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1331 $fr->record( $r->id );
1332 push @full_rec, $fr;
1335 # the rec_descriptor stuff
1336 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1337 $rd->record( $r->id );
1338 push @rec_descriptor, $rd;
1340 # the indexing field entry stuff
1341 for my $class ( qw/title author subject keyword series/ ) {
1342 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1343 $fe->source( $r->id );
1344 push @{$field_entry{$class}}, $fe;
1348 unless ($self->api_name =~ /nomap/o) {
1349 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1352 $mr = Fieldmapper::metabib::metarecord->new;
1353 $mr->fingerprint( $fp->{fingerprint} );
1354 $mr->master_record( $r->id );
1355 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1358 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1359 $mr_map->metarecord( $mr->id );
1360 $mr_map->source( $r->id );
1361 push @source_map, $mr_map;
1363 $metarecord{$mr->id} = $mr;
1365 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1367 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1368 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1373 if (@rec_descriptor) {
1374 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1376 OpenILS::Application::Ingest->storage_req(
1377 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1381 for my $mr ( values %metarecord ) {
1382 my $sources = OpenILS::Application::Ingest->storage_req(
1383 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1387 my $bibs = OpenILS::Application::Ingest->storage_req(
1388 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1389 [ map { $_->source } @$sources ]
1392 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1394 OpenILS::Application::Ingest->storage_req(
1395 'open-ils.storage.direct.metabib.metarecord.remote_update',
1397 { master_record => $master->id, mods => undef }
1401 OpenILS::Application::Ingest->storage_req(
1402 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1404 ) if (@rec_descriptor);
1406 OpenILS::Application::Ingest->storage_req(
1407 'open-ils.storage.direct.metabib.full_rec.batch.create',
1411 OpenILS::Application::Ingest->storage_req(
1412 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1413 @{ $field_entry{title} }
1414 ) if (@{ $field_entry{title} });
1416 OpenILS::Application::Ingest->storage_req(
1417 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1418 @{ $field_entry{author} }
1419 ) if (@{ $field_entry{author} });
1421 OpenILS::Application::Ingest->storage_req(
1422 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1423 @{ $field_entry{subject} }
1424 ) if (@{ $field_entry{subject} });
1426 OpenILS::Application::Ingest->storage_req(
1427 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1428 @{ $field_entry{keyword} }
1429 ) if (@{ $field_entry{keyword} });
1431 OpenILS::Application::Ingest->storage_req(
1432 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1433 @{ $field_entry{series} }
1434 ) if (@{ $field_entry{series} });
1436 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1442 $log->debug('Wormization failed : '.shift(), ERROR);
1443 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1447 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1448 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1451 __PACKAGE__->register_method(
1452 api_name => "open-ils.worm.wormize.biblio",
1453 method => "wormize_biblio_record",
1457 __PACKAGE__->register_method(
1458 api_name => "open-ils.worm.wormize.biblio.nomap",
1459 method => "wormize_biblio_record",
1463 __PACKAGE__->register_method(
1464 api_name => "open-ils.worm.wormize.biblio.noscrub",
1465 method => "wormize_biblio_record",
1469 __PACKAGE__->register_method(
1470 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1471 method => "wormize_biblio_record",
1476 sub wormize_authority_record {
1482 if (!OpenILS::Application::Ingest->in_transaction) {
1483 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1489 # clean up the cruft
1490 unless ($self->api_name =~ /noscrub/o) {
1491 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1495 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1498 my @rec_descriptor = ();
1499 for my $r (@$bibs) {
1500 my $xml = $parser->parse_string($r->marc);
1502 # the full_rec stuff
1503 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1504 $fr->record( $r->id );
1505 push @full_rec, $fr;
1508 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1509 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1510 #$rd->record( $r->id );
1511 #push @rec_descriptor, $rd;
1515 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1517 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1518 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1520 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1523 $log->debug('Wormization failed : '.shift(), ERROR);
1524 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1528 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1529 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1532 __PACKAGE__->register_method(
1533 api_name => "open-ils.worm.wormize.authority",
1534 method => "wormize_authority_record",
1538 __PACKAGE__->register_method(
1539 api_name => "open-ils.worm.wormize.authority.noscrub",
1540 method => "wormize_authority_record",
1546 # --------------------------------------------------------------------------------
1547 # MARC index extraction
1549 package OpenILS::Application::Ingest::XPATH;
1550 use base qw/OpenILS::Application::Ingest/;
1551 use Unicode::Normalize;
1553 # give this a MODS documentElement and an XPATH expression
1554 sub _xpath_to_string {
1558 my $ns_prefix = shift;
1561 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1565 # grab the set of matching nodes
1566 my @nodes = $xml->findnodes( $xpath );
1567 for my $value (@nodes) {
1569 # grab all children of the node
1570 my @children = $value->childNodes();
1571 for my $child (@children) {
1573 # add the childs content to the growing buffer
1574 my $content = quotemeta($child->textContent);
1575 next if ($unique && $string =~ /$content/); # uniquify the values
1576 $string .= $child->textContent . " ";
1579 $string .= $value->textContent . " ";
1582 return NFD($string);
1585 sub class_all_index_string_xml {
1591 OpenILS::Application::Ingest->post_init();
1592 $xml = $parser->parse_string($xml) unless (ref $xml);
1594 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1595 for my $type ( keys %{ $xpathset->{$class} } ) {
1596 my $value = _xpath_to_string(
1597 $mods_sheet->transform($xml)->documentElement,
1598 $xpathset->{$class}->{$type}->{xpath},
1599 "http://www.loc.gov/mods/",
1606 $value = NFD($value);
1607 $value =~ s/\pM+//sgo;
1608 $value =~ s/\pC+//sgo;
1609 $value =~ s/\W+$//sgo;
1611 $value =~ s/(\w)\./$1/sgo;
1612 $value = lc($value);
1614 my $fm = $class_constructor->new;
1615 $fm->value( $value );
1616 $fm->field( $xpathset->{$class}->{$type}->{id} );
1617 $client->respond($fm);
1621 __PACKAGE__->register_method(
1622 api_name => "open-ils.worm.field_entry.class.xml",
1623 method => "class_all_index_string_xml",
1629 sub class_all_index_string_record {
1635 OpenILS::Application::Ingest->post_init();
1636 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1638 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1640 $client->respond($fm);
1644 __PACKAGE__->register_method(
1645 api_name => "open-ils.worm.field_entry.class.record",
1646 method => "class_all_index_string_record",
1653 sub class_index_string_xml {
1660 OpenILS::Application::Ingest->post_init();
1661 $xml = $parser->parse_string($xml) unless (ref $xml);
1662 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1664 __PACKAGE__->register_method(
1665 api_name => "open-ils.worm.class.type.xml",
1666 method => "class_index_string_xml",
1671 sub class_index_string_record {
1678 OpenILS::Application::Ingest->post_init();
1679 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1681 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1682 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1685 __PACKAGE__->register_method(
1686 api_name => "open-ils.worm.class.type.record",
1687 method => "class_index_string_record",
1701 OpenILS::Application::Ingest->post_init();
1702 $xml = $parser->parse_string($xml) unless (ref $xml);
1703 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1705 __PACKAGE__->register_method(
1706 api_name => "open-ils.worm.xpath.xml",
1707 method => "xml_xpath",
1721 OpenILS::Application::Ingest->post_init();
1722 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1724 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1725 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1728 __PACKAGE__->register_method(
1729 api_name => "open-ils.worm.xpath.record",
1730 method => "record_xpath",
1736 # --------------------------------------------------------------------------------
1739 package OpenILS::Application::Ingest::Biblio::Leader;
1740 use base qw/OpenILS::Application::Ingest/;
1741 use Unicode::Normalize;
1743 our %marc_type_groups = (
1746 VIS => q/[gkro]{1}/,
1755 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1759 our %biblio_descriptor_code = (
1760 item_type => sub { substr($ldr,6,1); },
1763 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1764 return substr($oo8,29,1);
1765 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1766 return substr($oo8,23,1);
1770 bib_level => sub { substr($ldr,7,1); },
1771 control_type => sub { substr($ldr,8,1); },
1772 char_encoding => sub { substr($ldr,9,1); },
1773 enc_level => sub { substr($ldr,17,1); },
1774 cat_form => sub { substr($ldr,18,1); },
1775 pub_status => sub { substr($ldr,5,1); },
1776 item_lang => sub { substr($oo8,35,3); },
1777 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1778 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1779 audience => sub { substr($oo8,22,1); },
1782 sub _extract_biblio_descriptors {
1785 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1786 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1787 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1789 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1790 for my $rd_field ( keys %biblio_descriptor_code ) {
1791 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1797 sub extract_biblio_desc_xml {
1802 $xml = $parser->parse_string($xml) unless (ref $xml);
1804 return _extract_biblio_descriptors( $xml );
1806 __PACKAGE__->register_method(
1807 api_name => "open-ils.worm.biblio_leader.xml",
1808 method => "extract_biblio_desc_xml",
1813 sub extract_biblio_desc_record {
1818 OpenILS::Application::Ingest->post_init();
1819 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1821 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1822 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1825 __PACKAGE__->register_method(
1826 api_name => "open-ils.worm.biblio_leader.record",
1827 method => "extract_biblio_desc_record",
1832 # --------------------------------------------------------------------------------
1835 package OpenILS::Application::Ingest::FlatMARC;
1836 use base qw/OpenILS::Application::Ingest/;
1837 use Unicode::Normalize;
1840 sub _marcxml_to_full_rows {
1842 my $marcxml = shift;
1843 my $xmltype = shift || 'metabib';
1845 my $type = "Fieldmapper::${xmltype}::full_rec";
1849 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1851 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1852 next unless $tagline;
1854 my $ns = $type->new;
1857 my $val = $tagline->textContent;
1859 $val =~ s/\pM+//sgo;
1860 $val =~ s/\pC+//sgo;
1861 $val =~ s/\W+$//sgo;
1867 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1868 next unless $tagline;
1870 my $ns = $type->new;
1872 $ns->tag( $tagline->getAttribute( "tag" ) );
1873 my $val = $tagline->textContent;
1875 $val =~ s/\pM+//sgo;
1876 $val =~ s/\pC+//sgo;
1877 $val =~ s/\W+$//sgo;
1883 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1884 next unless $tagline;
1886 my $tag = $tagline->getAttribute( "tag" );
1887 my $ind1 = $tagline->getAttribute( "ind1" );
1888 my $ind2 = $tagline->getAttribute( "ind2" );
1890 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1893 my $ns = $type->new;
1898 $ns->subfield( $data->getAttribute( "code" ) );
1899 my $val = $data->textContent;
1901 $val =~ s/\pM+//sgo;
1902 $val =~ s/\pC+//sgo;
1903 $val =~ s/\W+$//sgo;
1904 $ns->value( lc($val) );
1910 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1919 $xml = $parser->parse_string($xml) unless (ref $xml);
1921 my $type = 'metabib';
1922 $type = 'authority' if ($self->api_name =~ /authority/o);
1924 OpenILS::Application::Ingest->post_init();
1926 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1929 __PACKAGE__->register_method(
1930 api_name => "open-ils.worm.flat_marc.authority.xml",
1931 method => "flat_marc_xml",
1936 __PACKAGE__->register_method(
1937 api_name => "open-ils.worm.flat_marc.biblio.xml",
1938 method => "flat_marc_xml",
1944 sub flat_marc_record {
1949 my $type = 'biblio';
1950 $type = 'authority' if ($self->api_name =~ /authority/o);
1952 OpenILS::Application::Ingest->post_init();
1953 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1955 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1958 __PACKAGE__->register_method(
1959 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1960 method => "flat_marc_record",
1965 __PACKAGE__->register_method(
1966 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1967 method => "flat_marc_record",
1974 # --------------------------------------------------------------------------------
1977 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1978 use base qw/OpenILS::Application::Ingest/;
1979 use Unicode::Normalize;
1980 use OpenSRF::EX qw/:try/;
1982 my @fp_mods_xpath = (
1983 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1986 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1987 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1988 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1989 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1992 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1994 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1995 $text =~ s/\pM+//gso;
1996 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1998 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1999 $text =~ s/\s+/ /sgo;
2000 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2001 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2002 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2003 $text =~ s/\b(?:the|an?)\b//sgo;
2004 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2005 $text =~ s/\[.[^\]]+\]//sgo;
2006 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2007 $text =~ s/\s*[;\/\.]*$//sgo;
2008 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2013 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2014 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2017 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2019 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2020 $text =~ s/\pM+//gso;
2021 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2023 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2024 $text =~ s/\s+/ /sgo;
2025 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2026 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2027 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2028 $text =~ s/,?\s+.*$//sgo;
2029 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2034 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2037 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2038 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2039 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2040 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2041 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2042 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2043 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2044 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2047 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2049 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2050 $text =~ s/\pM+//gso;
2051 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2053 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2054 $text =~ s/\s+/ /sgo;
2055 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2056 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2057 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2058 $text =~ s/\b(?:the|an?)\b//sgo;
2059 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2060 $text =~ s/\[.[^\]]+\]//sgo;
2061 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2062 $text =~ s/\s*[;\/\.]*$//sgo;
2063 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2068 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2069 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2070 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2071 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2074 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2077 $text =~ s/\pM+//gso;
2078 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2080 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2081 $text =~ s/\s+/ /sgo;
2082 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2083 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2084 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2085 $text =~ s/,?\s+.*$//sgo;
2086 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2093 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2097 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2101 my $match_index = 0;
2102 my $block_index = 1;
2103 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2104 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2106 my $block_name_index = 0;
2107 my $block_value_index = 1;
2108 my $block = $fp_mods_xpath[$block_index];
2109 while ( my $part = $$block[$block_value_index] ) {
2111 for my $xpath ( @{ $part->{xpath} } ) {
2112 $text = $mods->findvalue( $xpath );
2116 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2120 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2121 $fp_string .= $text;
2124 $block_name_index += 2;
2125 $block_value_index += 2;
2129 $fp_string =~ s/\W+//gso;
2130 $log->debug("Fingerprint is [$fp_string]", INFO);;
2140 sub refingerprint_bibrec {
2146 if (!OpenILS::Application::Ingest->in_transaction) {
2147 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2153 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2154 for my $b (@$bibs) {
2155 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2157 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2159 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2161 OpenILS::Application::Ingest->storage_req(
2162 'open-ils.storage.direct.biblio.record_entry.remote_update',
2164 { fingerprint => $fp->{fingerprint},
2165 quality => $fp->{quality} }
2168 if ($self->api_name !~ /nomap/o) {
2169 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2170 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2175 if (ref($old_source_map) and @$old_source_map) {
2176 for my $m (@$old_source_map) {
2177 $old_mrid = $m->metarecord;
2178 OpenILS::Application::Ingest->storage_req(
2179 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2185 my $old_sm = OpenILS::Application::Ingest->storage_req(
2186 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2187 { metarecord => $old_mrid }
2190 if (ref($old_sm) and @$old_sm == 0) {
2191 OpenILS::Application::Ingest->storage_req(
2192 'open-ils.storage.direct.metabib.metarecord.delete',
2197 my $mr = OpenILS::Application::Ingest->storage_req(
2198 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2199 { fingerprint => $fp->{fingerprint} }
2203 $mr = Fieldmapper::metabib::metarecord->new;
2204 $mr->fingerprint( $fp->{fingerprint} );
2205 $mr->master_record( $b->id );
2206 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2209 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2210 $mr_map->metarecord( $mr->id );
2211 $mr_map->source( $b->id );
2212 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2216 $client->respond($b->id);
2220 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2224 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2225 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2228 __PACKAGE__->register_method(
2229 api_name => "open-ils.worm.fingerprint.record.update",
2230 method => "refingerprint_bibrec",
2236 __PACKAGE__->register_method(
2237 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2238 method => "refingerprint_bibrec",
2245 sub fingerprint_bibrec {
2250 OpenILS::Application::Ingest->post_init();
2251 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2253 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2254 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2258 __PACKAGE__->register_method(
2259 api_name => "open-ils.worm.fingerprint.record",
2260 method => "fingerprint_bibrec",
2266 sub fingerprint_mods {
2271 OpenILS::Application::Ingest->post_init();
2272 my $mods = $parser->parse_string($xml)->documentElement;
2274 return _fp_mods( $mods );
2276 __PACKAGE__->register_method(
2277 api_name => "open-ils.worm.fingerprint.mods",
2278 method => "fingerprint_mods",
2283 sub fingerprint_marc {
2288 $xml = $parser->parse_string($xml) unless (ref $xml);
2290 OpenILS::Application::Ingest->post_init();
2291 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2292 $log->debug("Returning [$fp] as fingerprint", INFO);
2295 __PACKAGE__->register_method(
2296 api_name => "open-ils.worm.fingerprint.marc",
2297 method => "fingerprint_marc",
2305 sub biblio_fingerprint_record {
2310 OpenILS::Application::Ingest->post_init();
2312 my $marc = OpenILS::Application::Ingest
2313 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2316 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2317 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2320 __PACKAGE__->register_method(
2321 api_name => "open-ils.worm.fingerprint.record",
2322 method => "biblio_fingerprint_record",
2328 sub biblio_fingerprint {
2333 OpenILS::Application::Ingest->post_init();
2335 $marc = $parser->parse_string($marc) unless (ref $marc);
2337 my $mods = OpenILS::Application::Ingest::entityize(
2339 ->transform( $marc )
2345 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2348 $log->internal("Got MARC [$marc]");
2349 $log->internal("Created MODS [$mods]");
2352 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2353 my $conf = OpenSRF::Utils::SettingsClient->new;
2355 my $libs = $conf->config_value(@pfx, 'script_path');
2356 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2357 my $script_libs = (ref($libs)) ? $libs : [$libs];
2359 $log->debug("Loading script $script_file for biblio fingerprinting...");
2361 $fp_script = new OpenILS::Utils::ScriptRunner
2362 ( file => $script_file,
2363 paths => $script_libs,
2364 reset_count => 1000 );
2367 $log->debug("Applying environment for biblio fingerprinting...");
2369 my $env = {marc => $marc, mods => $mods};
2370 #my $res = {fingerprint => '', quality => '0'};
2372 $fp_script->insert('environment' => $env);
2373 #$fp_script->insert('result' => $res);
2375 $log->debug("Running script for biblio fingerprinting...");
2377 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2379 $log->debug("Script for biblio fingerprinting completed successfully...");
2383 __PACKAGE__->register_method(
2384 api_name => "open-ils.worm.fingerprint.marc",
2385 method => "biblio_fingerprint",
2390 # --------------------------------------------------------------------------------
2404 my $create_source_map;
2419 my %descriptor_code = (
2420 item_type => 'substr($ldr,6,1)',
2421 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2422 bib_level => 'substr($ldr,7,1)',
2423 control_type => 'substr($ldr,8,1)',
2424 char_encoding => 'substr($ldr,9,1)',
2425 enc_level => 'substr($ldr,17,1)',
2426 cat_form => 'substr($ldr,18,1)',
2427 pub_status => 'substr($ldr,5,1)',
2428 item_lang => 'substr($oo8,35,3)',
2429 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2430 audience => 'substr($oo8,22,1)',
2440 if ($self->api_name =~ /no_map/o) {
2444 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2446 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2448 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2450 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2452 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2453 unless ($sm_lookup);
2454 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2455 unless ($mr_lookup);
2456 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2457 unless ($mr_update);
2458 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2460 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2461 unless ($update_entry);
2462 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2463 unless ($rm_old_sm);
2464 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2465 unless ($rm_old_rd);
2466 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2467 unless ($rm_old_fr);
2468 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2469 unless ($rm_old_tr);
2470 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2471 unless ($rm_old_ar);
2472 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2473 unless ($rm_old_sr);
2474 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2475 unless ($rm_old_kr);
2476 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2477 unless ($rm_old_ser);
2478 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2479 unless ($mr_create);
2480 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2481 unless ($create_source_map);
2482 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2483 unless ($rd_create);
2484 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2485 unless ($fr_create);
2486 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2487 unless ($$create{title});
2488 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2489 unless ($$create{author});
2490 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2491 unless ($$create{subject});
2492 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2493 unless ($$create{keyword});
2494 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2495 unless ($$create{series});
2498 my ($outer_xact) = $in_xact->run;
2500 unless ($outer_xact) {
2501 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2502 my ($r) = $begin->run($client);
2503 unless (defined $r and $r) {
2505 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2508 } catch Error with {
2509 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2519 for my $entry ( $lookup->run(@docids) ) {
2520 # step -1: grab the doc from storage
2521 next unless ($entry);
2524 my $xslt_doc = $parser->parse_file(
2525 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2526 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2529 my $xml = $entry->marc;
2530 my $docid = $entry->id;
2531 my $marcdoc = $parser->parse_string($xml);
2532 my $modsdoc = $mods_sheet->transform($marcdoc);
2534 my $mods = $modsdoc->documentElement;
2535 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2537 $entry->fingerprint( fingerprint_mods( $mods ) );
2538 push @entry_list, $entry;
2540 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2543 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2544 if (!$mr || !@$mr) {
2545 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2546 $mr = new Fieldmapper::metabib::metarecord;
2547 $mr->fingerprint( $entry->fingerprint );
2548 $mr->master_record( $entry->id );
2549 my ($new_mr) = $mr_create->run($mr);
2551 unless (defined $mr) {
2552 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2555 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2560 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2561 $sm->metarecord( $mr->id );
2562 $sm->source( $entry->id );
2563 push @source_maps, $sm;
2566 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2567 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2569 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2570 for my $rd_field ( keys %descriptor_code ) {
2571 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2573 $rd_obj->record( $docid );
2574 push @rd_list, $rd_obj;
2576 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2578 # step 2: build the KOHA rows
2579 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2580 $_->record( $docid ) for (@tmp_list);
2581 push @ns_list, @tmp_list;
2585 last unless ($self->api_name =~ /batch$/o);
2588 $rm_old_rd->run( { record => \@docids } );
2589 $rm_old_fr->run( { record => \@docids } );
2590 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2591 $rm_old_tr->run( { source => \@docids } );
2592 $rm_old_ar->run( { source => \@docids } );
2593 $rm_old_sr->run( { source => \@docids } );
2594 $rm_old_kr->run( { source => \@docids } );
2595 $rm_old_ser->run( { source => \@docids } );
2598 my ($sm) = $create_source_map->run(@source_maps);
2599 unless (defined $sm) {
2600 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2602 my ($mr) = $mr_update->run(@mr_list);
2603 unless (defined $mr) {
2604 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2608 my ($re) = $update_entry->run(@entry_list);
2609 unless (defined $re) {
2610 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2613 my ($rd) = $rd_create->run(@rd_list);
2614 unless (defined $rd) {
2615 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2618 my ($fr) = $fr_create->run(@ns_list);
2619 unless (defined $fr) {
2620 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2623 # step 5: insert the new metadata
2624 for my $class ( qw/title author subject keyword series/ ) {
2626 for my $doc ( @mods_data ) {
2627 my ($did) = keys %$doc;
2628 my ($data) = values %$doc;
2630 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2631 for my $row ( keys %{ $$data{$class} } ) {
2632 next unless (exists $$data{$class}{$row});
2633 next unless ($$data{$class}{$row}{value});
2634 my $fm_obj = $fm_constructor->new;
2635 $fm_obj->value( $$data{$class}{$row}{value} );
2636 $fm_obj->field( $$data{$class}{$row}{field_id} );
2637 $fm_obj->source( $did );
2638 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2640 push @md_list, $fm_obj;
2644 my ($cr) = $$create{$class}->run(@md_list);
2645 unless (defined $cr) {
2646 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2650 unless ($outer_xact) {
2651 $log->debug("Commiting transaction started by the Ingest.", INFO);
2652 my ($c) = $commit->run;
2653 unless (defined $c and $c) {
2655 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2661 __PACKAGE__->register_method(
2662 api_name => "open-ils.worm.wormize",
2663 method => "wormize",
2667 __PACKAGE__->register_method(
2668 api_name => "open-ils.worm.wormize.no_map",
2669 method => "wormize",
2673 __PACKAGE__->register_method(
2674 api_name => "open-ils.worm.wormize.batch",
2675 method => "wormize",
2679 __PACKAGE__->register_method(
2680 api_name => "open-ils.worm.wormize.no_map.batch",
2681 method => "wormize",
2696 my $acreate_source_map;
2711 sub authority_wormize {
2718 if ($self->api_name =~ /no_map/o) {
2722 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2724 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2726 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2728 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2730 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2732 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2733 unless ($aupdate_entry);
2734 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2735 unless ($arm_old_rd);
2736 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2737 unless ($arm_old_fr);
2738 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2739 unless ($ard_create);
2740 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2741 unless ($afr_create);
2744 my ($outer_xact) = $in_xact->run;
2746 unless ($outer_xact) {
2747 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2748 my ($r) = $begin->run($client);
2749 unless (defined $r and $r) {
2751 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2754 } catch Error with {
2755 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2765 for my $entry ( $lookup->run(@docids) ) {
2766 # step -1: grab the doc from storage
2767 next unless ($entry);
2770 # my $xslt_doc = $parser->parse_file(
2771 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2772 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2775 my $xml = $entry->marc;
2776 my $docid = $entry->id;
2777 my $marcdoc = $parser->parse_string($xml);
2778 #my $madsdoc = $mads_sheet->transform($marcdoc);
2780 #my $mads = $madsdoc->documentElement;
2781 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2783 push @entry_list, $entry;
2785 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2786 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2788 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2789 for my $rd_field ( keys %descriptor_code ) {
2790 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2792 $rd_obj->record( $docid );
2793 push @rd_list, $rd_obj;
2795 # step 2: build the KOHA rows
2796 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2797 $_->record( $docid ) for (@tmp_list);
2798 push @ns_list, @tmp_list;
2802 last unless ($self->api_name =~ /batch$/o);
2805 $arm_old_rd->run( { record => \@docids } );
2806 $arm_old_fr->run( { record => \@docids } );
2808 my ($rd) = $ard_create->run(@rd_list);
2809 unless (defined $rd) {
2810 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2813 my ($fr) = $fr_create->run(@ns_list);
2814 unless (defined $fr) {
2815 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2818 unless ($outer_xact) {
2819 $log->debug("Commiting transaction started by Ingest.", INFO);
2820 my ($c) = $commit->run;
2821 unless (defined $c and $c) {
2823 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2829 __PACKAGE__->register_method(
2830 api_name => "open-ils.worm.authortiy.wormize",
2831 method => "wormize",
2835 __PACKAGE__->register_method(
2836 api_name => "open-ils.worm.authority.wormize.batch",
2837 method => "wormize",
2843 # --------------------------------------------------------------------------------
2846 sub _marcxml_to_full_rows {
2848 my $marcxml = shift;
2849 my $type = shift || 'Fieldmapper::metabib::full_rec';
2853 my $root = $marcxml->documentElement;
2855 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2856 next unless $tagline;
2858 my $ns = new Fieldmapper::metabib::full_rec;
2861 my $val = NFD($tagline->textContent);
2862 $val =~ s/(\pM+)//gso;
2868 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2869 next unless $tagline;
2871 my $ns = new Fieldmapper::metabib::full_rec;
2873 $ns->tag( $tagline->getAttribute( "tag" ) );
2874 my $val = NFD($tagline->textContent);
2875 $val =~ s/(\pM+)//gso;
2881 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2882 next unless $tagline;
2884 my $tag = $tagline->getAttribute( "tag" );
2885 my $ind1 = $tagline->getAttribute( "ind1" );
2886 my $ind2 = $tagline->getAttribute( "ind2" );
2888 for my $data ( $tagline->childNodes ) {
2891 my $ns = $type->new;
2896 $ns->subfield( $data->getAttribute( "code" ) );
2897 my $val = NFD($data->textContent);
2898 $val =~ s/(\pM+)//gso;
2899 $ns->value( lc($val) );
2907 sub _get_field_value {
2909 my( $root, $xpath ) = @_;
2913 # grab the set of matching nodes
2914 my @nodes = $root->findnodes( $xpath );
2915 for my $value (@nodes) {
2917 # grab all children of the node
2918 my @children = $value->childNodes();
2919 for my $child (@children) {
2921 # add the childs content to the growing buffer
2922 my $content = quotemeta($child->textContent);
2923 next if ($string =~ /$content/); # uniquify the values
2924 $string .= $child->textContent . " ";
2927 $string .= $value->textContent . " ";
2930 $string = NFD($string);
2931 $string =~ s/(\pM)//gso;
2936 sub modsdoc_to_values {
2937 my( $self, $mods ) = @_;
2939 for my $class (keys %$xpathset) {
2940 $data->{$class} = {};
2941 for my $type (keys %{$xpathset->{$class}}) {
2942 $data->{$class}->{$type} = {};
2943 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};