1 package OpenILS::Application::Ingest;
2 use base qw/OpenSRF::Application/;
4 use Unicode::Normalize;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::AppSession;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
11 use OpenILS::Utils::ScriptRunner;
12 use OpenILS::Utils::Fieldmapper;
15 use OpenILS::Utils::Fieldmapper;
19 use Time::HiRes qw(time);
21 our %supported_formats = (
22 mods3 => {ns => 'http://www.loc.gov/mods/v3'},
23 mods => {ns => 'http://www.loc.gov/mods/'},
24 marcxml => {ns => 'http://www.loc.gov/MARC21/slim'},
25 srw_dc => {ns => 'info:srw/schema/1/dc-schema'},
26 oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'},
27 rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
28 atom => {ns => 'http://www.w3.org/2005/Atom'},
29 rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'},
33 rss10 => {ns => 'http://purl.org/rss/1.0/'},
34 rss11 => {ns => 'http://purl.org/net/rss1.1#'},
39 my $log = 'OpenSRF::Utils::Logger';
41 my $parser = XML::LibXML->new();
42 my $xslt = XML::LibXSLT->new();
52 unless (keys %$xpathset) {
53 $log->debug("Running post_init", DEBUG);
55 my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl');
57 unless ($supported_formats{mods}{xslt}) {
58 $log->debug("Loading MODS XSLT", DEBUG);
59 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl");
60 $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
63 unless ($supported_formats{mods3}{xslt}) {
64 $log->debug("Loading MODS v3 XSLT", DEBUG);
65 my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl");
66 $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc );
70 my $req = OpenSRF::AppSession
71 ->create('open-ils.cstore')
72 ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } )
75 if (ref $req and @$req) {
77 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
78 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
79 $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format;
80 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
96 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
100 # --------------------------------------------------------------------------------
103 package OpenILS::Application::Ingest::Biblio;
104 use base qw/OpenILS::Application::Ingest/;
105 use Unicode::Normalize;
107 sub rw_biblio_ingest_single_object {
112 my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
113 return undef unless ($blob);
115 $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
116 $bib->quality( $blob->{fingerprint}->{quality} );
118 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
120 my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
122 # update full_rec stuff ...
123 my $tmp = $cstore->request(
124 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
125 { record => $bib->id }
128 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
129 $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
131 # update rec_descriptor stuff ...
132 $tmp = $cstore->request(
133 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
134 { record => $bib->id }
137 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
138 $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
140 # deal with classed fields...
141 for my $class ( qw/title author subject keyword series/ ) {
142 $tmp = $cstore->request(
143 "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
144 { source => $bib->id }
147 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
149 for my $obj ( @{ $blob->{field_entries} } ) {
150 my $class = $obj->class_name;
151 $class =~ s/^Fieldmapper:://o;
153 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
158 $tmp = $cstore->request(
159 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic',
160 { source => $bib->id }
163 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_ )->gather(1) for (@$tmp);
166 # Get the matchin MR, if any.
167 my $mr = $cstore->request(
168 'open-ils.cstore.direct.metabib.metarecord.search',
169 { fingerprint => $bib->fingerprint }
173 $mr = new Fieldmapper::metabib::metarecord;
174 $mr->fingerprint( $bib->fingerprint );
175 $mr->master_record( $bib->id );
178 "open-ils.cstore.direct.metabib.metarecord.create",
179 $mr => { quiet => 'true' }
183 my $mrm = $cstore->request(
184 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
185 { metarecord => $mr->id }
188 my $best = $cstore->request(
189 "open-ils.cstore.direct.biblio.record_entry.search",
190 { id => [ map { $_->source } @$mrm ] },
191 { 'select' => { bre => [ qw/id quality/ ] },
192 order_by => { bre => "quality desc" },
197 if ($best->quality > $bib->quality) {
198 $mr->master_record($best->id);
200 $mr->master_record($bib->id);
203 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
206 my $mrm = new Fieldmapper::metabib::metarecord_source_map;
207 $mrm->source($bib->id);
208 $mrm->metarecord($mr->id);
210 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
211 $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
213 $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
217 __PACKAGE__->register_method(
218 api_name => "open-ils.ingest.full.biblio.object",
219 method => "rw_biblio_ingest_single_object",
224 sub rw_biblio_ingest_single_record {
229 OpenILS::Application::Ingest->post_init();
230 my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
231 $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
233 my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
235 $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
238 return undef unless ($r and @$r);
240 return $self->method_lookup("open-ils.ingest.full.biblio.object")->run($r);
242 __PACKAGE__->register_method(
243 api_name => "open-ils.ingest.full.biblio.record",
244 method => "rw_biblio_ingest_single_record",
249 sub ro_biblio_ingest_single_object {
253 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
255 my $document = $parser->parse_string($xml);
257 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
258 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
259 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
260 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
262 $_->source($bib->id) for (@mXfe);
263 $_->record($bib->id) for (@mfr);
264 $rd->record($bib->id) if ($rd);
266 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
268 __PACKAGE__->register_method(
269 api_name => "open-ils.ingest.full.biblio.object.readonly",
270 method => "ro_biblio_ingest_single_object",
275 sub ro_biblio_ingest_single_xml {
278 my $xml = OpenILS::Application::Ingest::entityize(shift);
280 my $document = $parser->parse_string($xml);
282 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
283 my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
284 my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
285 my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
287 return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
289 __PACKAGE__->register_method(
290 api_name => "open-ils.ingest.full.biblio.xml.readonly",
291 method => "ro_biblio_ingest_single_xml",
296 sub ro_biblio_ingest_single_record {
301 OpenILS::Application::Ingest->post_init();
302 my $r = OpenSRF::AppSession
303 ->create('open-ils.cstore')
304 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
307 return undef unless ($r and @$r);
309 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
311 $_->source($rec) for (@{$res->{field_entries}});
312 $_->record($rec) for (@{$res->{full_rec}});
313 $res->{descriptor}->record($rec);
317 __PACKAGE__->register_method(
318 api_name => "open-ils.ingest.full.biblio.record.readonly",
319 method => "ro_biblio_ingest_single_record",
324 sub ro_biblio_ingest_stream_record {
328 OpenILS::Application::Ingest->post_init();
330 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
332 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
334 my $rec = $resp->content;
335 last unless (defined $rec);
337 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
338 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
340 $_->source($rec) for (@{$res->{field_entries}});
341 $_->record($rec) for (@{$res->{full_rec}});
343 $client->respond( $res );
348 __PACKAGE__->register_method(
349 api_name => "open-ils.ingest.full.biblio.record_stream.readonly",
350 method => "ro_biblio_ingest_stream_record",
355 sub ro_biblio_ingest_stream_xml {
359 OpenILS::Application::Ingest->post_init();
361 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
363 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
365 my $xml = $resp->content;
366 last unless (defined $xml);
368 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
369 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
371 $client->respond( $res );
376 __PACKAGE__->register_method(
377 api_name => "open-ils.ingest.full.biblio.xml_stream.readonly",
378 method => "ro_biblio_ingest_stream_xml",
383 sub rw_biblio_ingest_stream_import {
387 OpenILS::Application::Ingest->post_init();
389 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
391 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
393 my $bib = $resp->content;
394 last unless (defined $bib);
396 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
397 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
399 $_->source($bib->id) for (@{$res->{field_entries}});
400 $_->record($bib->id) for (@{$res->{full_rec}});
402 $client->respond( $res );
407 __PACKAGE__->register_method(
408 api_name => "open-ils.ingest.full.biblio.bib_stream.import",
409 method => "rw_biblio_ingest_stream_import",
415 # --------------------------------------------------------------------------------
418 package OpenILS::Application::Ingest::Authority;
419 use base qw/OpenILS::Application::Ingest/;
420 use Unicode::Normalize;
422 sub ro_authority_ingest_single_object {
426 my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
428 my $document = $parser->parse_string($xml);
430 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
432 $_->record($bib->id) for (@mfr);
434 return { full_rec => \@mfr };
436 __PACKAGE__->register_method(
437 api_name => "open-ils.ingest.full.authority.object.readonly",
438 method => "ro_authority_ingest_single_object",
443 sub ro_authority_ingest_single_xml {
446 my $xml = OpenILS::Application::Ingest::entityize(shift);
448 my $document = $parser->parse_string($xml);
450 my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
452 return { full_rec => \@mfr };
454 __PACKAGE__->register_method(
455 api_name => "open-ils.ingest.full.authority.xml.readonly",
456 method => "ro_authority_ingest_single_xml",
461 sub ro_authority_ingest_single_record {
466 OpenILS::Application::Ingest->post_init();
467 my $r = OpenSRF::AppSession
468 ->create('open-ils.cstore')
469 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
472 return undef unless ($r and @$r);
474 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
476 $_->record($rec) for (@{$res->{full_rec}});
477 $res->{descriptor}->record($rec);
481 __PACKAGE__->register_method(
482 api_name => "open-ils.ingest.full.authority.record.readonly",
483 method => "ro_authority_ingest_single_record",
488 sub ro_authority_ingest_stream_record {
492 OpenILS::Application::Ingest->post_init();
494 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
496 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
498 my $rec = $resp->content;
499 last unless (defined $rec);
501 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
502 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
504 $_->record($rec) for (@{$res->{full_rec}});
506 $client->respond( $res );
511 __PACKAGE__->register_method(
512 api_name => "open-ils.ingest.full.authority.record_stream.readonly",
513 method => "ro_authority_ingest_stream_record",
518 sub ro_authority_ingest_stream_xml {
522 OpenILS::Application::Ingest->post_init();
524 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
526 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
528 my $xml = $resp->content;
529 last unless (defined $xml);
531 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
532 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
534 $client->respond( $res );
539 __PACKAGE__->register_method(
540 api_name => "open-ils.ingest.full.authority.xml_stream.readonly",
541 method => "ro_authority_ingest_stream_xml",
546 sub rw_authority_ingest_stream_import {
550 OpenILS::Application::Ingest->post_init();
552 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
554 while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
556 my $bib = $resp->content;
557 last unless (defined $bib);
559 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
560 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
562 $_->record($bib->id) for (@{$res->{full_rec}});
564 $client->respond( $res );
569 __PACKAGE__->register_method(
570 api_name => "open-ils.ingest.full.authority.bib_stream.import",
571 method => "rw_authority_ingest_stream_import",
577 # --------------------------------------------------------------------------------
578 # MARC index extraction
580 package OpenILS::Application::Ingest::XPATH;
581 use base qw/OpenILS::Application::Ingest/;
582 use Unicode::Normalize;
584 # give this an XML documentElement and an XPATH expression
585 sub xpath_to_string {
589 my $ns_prefix = shift;
592 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
596 # grab the set of matching nodes
597 my @nodes = $xml->findnodes( $xpath );
598 for my $value (@nodes) {
600 # grab all children of the node
601 my @children = $value->childNodes();
602 for my $child (@children) {
604 # add the childs content to the growing buffer
605 my $content = quotemeta($child->textContent);
606 next if ($unique && $string =~ /$content/); # uniquify the values
607 $string .= $child->textContent . " ";
610 $string .= $value->textContent . " ";
616 sub class_index_string_xml {
622 OpenILS::Application::Ingest->post_init();
623 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
627 for my $class (@classes) {
628 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
629 for my $type ( keys %{ $xpathset->{$class} } ) {
631 my $def = $xpathset->{$class}->{$type};
632 my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
637 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
638 $transform_cache{$def->{format}} = $document;
641 my $value = xpath_to_string(
642 $document->documentElement => $def->{xpath},
643 $sf->{ns} => $def->{format},
649 $value = NFD($value);
650 $value =~ s/\pM+//sgo;
651 $value =~ s/\pC+//sgo;
652 $value =~ s/\W+$//sgo;
654 $value =~ s/(\w)\.+(\w)/$1$2/sgo;
657 my $fm = $class_constructor->new;
658 $fm->value( $value );
659 $fm->field( $xpathset->{$class}->{$type}->{id} );
660 $client->respond($fm);
665 __PACKAGE__->register_method(
666 api_name => "open-ils.ingest.field_entry.class.xml",
667 method => "class_index_string_xml",
673 sub class_index_string_record {
679 OpenILS::Application::Ingest->post_init();
680 my $r = OpenSRF::AppSession
681 ->create('open-ils.cstore')
682 ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
685 return undef unless ($r and @$r);
687 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
689 $client->respond($fm);
693 __PACKAGE__->register_method(
694 api_name => "open-ils.ingest.field_entry.class.record",
695 method => "class_index_string_record",
701 sub all_index_string_xml {
706 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
707 $client->respond($fm);
711 __PACKAGE__->register_method(
712 api_name => "open-ils.ingest.extract.field_entry.all.xml",
713 method => "all_index_string_xml",
719 sub all_index_string_record {
724 OpenILS::Application::Ingest->post_init();
725 my $r = OpenSRF::AppSession
726 ->create('open-ils.cstore')
727 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
730 return undef unless ($r and @$r);
732 for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
734 $client->respond($fm);
738 __PACKAGE__->register_method(
739 api_name => "open-ils.ingest.extract.field_entry.all.record",
740 method => "all_index_string_record",
746 # --------------------------------------------------------------------------------
749 package OpenILS::Application::Ingest::FlatMARC;
750 use base qw/OpenILS::Application::Ingest/;
751 use Unicode::Normalize;
754 sub _marcxml_to_full_rows {
757 my $xmltype = shift || 'metabib';
759 my $type = "Fieldmapper::${xmltype}::full_rec";
763 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
765 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
766 next unless $tagline;
771 my $val = $tagline->textContent;
781 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
782 next unless $tagline;
786 $ns->tag( $tagline->getAttribute( "tag" ) );
787 my $val = $tagline->textContent;
797 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
798 next unless $tagline;
800 my $tag = $tagline->getAttribute( "tag" );
801 my $ind1 = $tagline->getAttribute( "ind1" );
802 my $ind2 = $tagline->getAttribute( "ind2" );
804 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
812 $ns->subfield( $data->getAttribute( "code" ) );
813 my $val = $data->textContent;
818 $ns->value( lc($val) );
824 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
833 $log->debug("processing [$xml]");
835 $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
837 my $type = 'metabib';
838 $type = 'authority' if ($self->api_name =~ /authority/o);
840 OpenILS::Application::Ingest->post_init();
842 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
845 __PACKAGE__->register_method(
846 api_name => "open-ils.ingest.flat_marc.authority.xml",
847 method => "flat_marc_xml",
852 __PACKAGE__->register_method(
853 api_name => "open-ils.ingest.flat_marc.biblio.xml",
854 method => "flat_marc_xml",
860 sub flat_marc_record {
866 $type = 'authority' if ($self->api_name =~ /authority/o);
868 OpenILS::Application::Ingest->post_init();
869 my $r = OpenSRF::AppSession
870 ->create('open-ils.cstore')
871 ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
875 return undef unless ($r and $r->marc);
877 my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
878 for my $row (@rows) {
879 $client->respond($row);
880 $log->debug(JSON->perl2JSON($row), DEBUG);
884 __PACKAGE__->register_method(
885 api_name => "open-ils.ingest.flat_marc.biblio.record_entry",
886 method => "flat_marc_record",
891 __PACKAGE__->register_method(
892 api_name => "open-ils.ingest.flat_marc.authority.record_entry",
893 method => "flat_marc_record",
899 # --------------------------------------------------------------------------------
902 package OpenILS::Application::Ingest::Biblio::Fingerprint;
903 use base qw/OpenILS::Application::Ingest/;
904 use Unicode::Normalize;
905 use OpenSRF::EX qw/:try/;
907 sub biblio_fingerprint_record {
912 OpenILS::Application::Ingest->post_init();
914 my $r = OpenSRF::AppSession
915 ->create('open-ils.cstore')
916 ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
919 return undef unless ($r and $r->marc);
921 my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
922 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
923 $fp->{quality} = int($fp->{quality});
926 __PACKAGE__->register_method(
927 api_name => "open-ils.ingest.fingerprint.record",
928 method => "biblio_fingerprint_record",
934 sub biblio_fingerprint {
937 my $xml = OpenILS::Application::Ingest::entityize(shift);
939 $log->internal("Got MARC [$xml]");
942 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
943 my $conf = OpenSRF::Utils::SettingsClient->new;
945 my $libs = $conf->config_value(@pfx, 'script_path');
946 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
947 my $script_libs = (ref($libs)) ? $libs : [$libs];
949 $log->debug("Loading script $script_file for biblio fingerprinting...");
951 $fp_script = new OpenILS::Utils::ScriptRunner
952 ( file => $script_file,
953 paths => $script_libs,
954 reset_count => 100 );
957 $fp_script->insert('environment' => {marc => $xml} => 1);
959 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef);
960 $log->debug("Script for biblio fingerprinting completed successfully...");
964 __PACKAGE__->register_method(
965 api_name => "open-ils.ingest.fingerprint.xml",
966 method => "biblio_fingerprint",
972 sub biblio_descriptor {
975 my $xml = OpenILS::Application::Ingest::entityize(shift);
977 $log->internal("Got MARC [$xml]");
980 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
981 my $conf = OpenSRF::Utils::SettingsClient->new;
983 my $libs = $conf->config_value(@pfx, 'script_path');
984 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
985 my $script_libs = (ref($libs)) ? $libs : [$libs];
987 $log->debug("Loading script $script_file for biblio descriptor extraction...");
989 $rd_script = new OpenILS::Utils::ScriptRunner
990 ( file => $script_file,
991 paths => $script_libs,
992 reset_count => 100 );
995 $log->debug("Setting up environment for descriptor extraction script...");
996 $rd_script->insert('environment.marc' => $xml => 1);
997 $log->debug("Environment building complete...");
999 my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef);
1000 $log->debug("Script for biblio descriptor extraction completed successfully");
1004 __PACKAGE__->register_method(
1005 api_name => "open-ils.ingest.descriptor.xml",
1006 method => "biblio_descriptor",
1016 sub in_transaction {
1017 OpenILS::Application::Ingest->post_init();
1018 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1021 sub begin_transaction {
1025 OpenILS::Application::Ingest->post_init();
1026 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1030 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
1031 #__PACKAGE__->st_sess->connect;
1032 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.begin', $client );
1033 unless (defined $r and $r) {
1034 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1035 #__PACKAGE__->st_sess->disconnect;
1036 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1040 $log->debug("Ingest Couldn't BEGIN transaction!", ERROR)
1043 return __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1046 sub rollback_transaction {
1050 OpenILS::Application::Ingest->post_init();
1051 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1055 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1057 $log->debug("Ingest isn't inside a transaction.", INFO);
1059 } catch Error with {
1060 throw OpenSRF::EX::PANIC ("Ingest Couldn't ROLLBACK transaction!")
1066 sub commit_transaction {
1070 OpenILS::Application::Ingest->post_init();
1071 my $outer_xact = __PACKAGE__->storage_req( 'open-ils.storage.transaction.current' );
1074 #if (__PACKAGE__->st_sess->connected && $outer_xact) {
1076 my $r = __PACKAGE__->storage_req( 'open-ils.storage.transaction.commit' );
1077 unless (defined $r and $r) {
1078 __PACKAGE__->storage_req( 'open-ils.storage.transaction.rollback' );
1079 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
1081 #__PACKAGE__->st_sess->disconnect;
1083 $log->debug("Ingest isn't inside a transaction.", INFO);
1085 } catch Error with {
1086 throw OpenSRF::EX::PANIC ("Ingest Couldn't COMMIT transaction!")
1095 my @res = __PACKAGE__->method_lookup( $method )->run( @_ );
1096 return shift( @res );
1099 sub scrub_authority_record {
1105 if (!OpenILS::Application::Ingest->in_transaction) {
1106 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1112 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_authority_record' );
1114 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
1115 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
1117 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_authority_record' );
1119 $log->debug('Scrubbing failed : '.shift(), ERROR);
1120 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_authority_record' );
1124 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1125 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1128 __PACKAGE__->register_method(
1129 api_name => "open-ils.worm.scrub.authority",
1130 method => "scrub_authority_record",
1136 sub scrub_metabib_record {
1141 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1142 $rec = OpenILS::Application::Ingest->storage_req(
1143 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1148 if (!OpenILS::Application::Ingest->in_transaction) {
1149 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1155 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'scrub_metabib_record' );
1157 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
1158 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
1159 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
1160 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
1161 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
1162 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
1163 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
1164 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
1166 $log->debug( "Looking for metarecords whose master is $rec", DEBUG);
1167 my $masters = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.master_record.atomic', $rec );
1169 for my $mr (@$masters) {
1170 $log->debug( "Found metarecord whose master is $rec", DEBUG);
1171 my $others = OpenILS::Application::Ingest->storage_req(
1172 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic', $mr->id );
1175 $log->debug("Metarecord ".$mr->id." had master of $rec, setting to ".$others->[0]->source, DEBUG);
1176 $mr->master_record($others->[0]->source);
1177 OpenILS::Application::Ingest->storage_req(
1178 'open-ils.storage.direct.metabib.metarecord.remote_update',
1180 { master_record => $others->[0]->source, mods => undef }
1183 warn "Removing metarecord whose master is $rec";
1184 $log->debug( "Removing metarecord whose master is $rec", DEBUG);
1185 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
1186 warn "Metarecord removed";
1187 $log->debug( "Metarecord removed", DEBUG);
1191 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'scrub_metabib_record' );
1194 $log->debug('Scrubbing failed : '.shift(), ERROR);
1195 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'scrub_metabib_record' );
1199 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1200 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1203 __PACKAGE__->register_method(
1204 api_name => "open-ils.worm.scrub.biblio",
1205 method => "scrub_metabib_record",
1210 sub wormize_biblio_metarecord {
1215 my $recs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic' => $mrec );
1218 for my $r (@$recs) {
1221 $success = wormize_biblio_record($self => $client => $r->source);
1223 { record => $r->source,
1224 metarecord => $rec->metarecord,
1225 success => $success,
1228 } catch Error with {
1231 { record => $r->source,
1232 metarecord => $rec->metarecord,
1233 success => $success,
1241 __PACKAGE__->register_method(
1242 api_name => "open-ils.worm.wormize.metarecord",
1243 method => "wormize_biblio_metarecord",
1248 __PACKAGE__->register_method(
1249 api_name => "open-ils.worm.wormize.metarecord.nomap",
1250 method => "wormize_biblio_metarecord",
1255 __PACKAGE__->register_method(
1256 api_name => "open-ils.worm.wormize.metarecord.noscrub",
1257 method => "wormize_biblio_metarecord",
1262 __PACKAGE__->register_method(
1263 api_name => "open-ils.worm.wormize.metarecord.nomap.noscrub",
1264 method => "wormize_biblio_metarecord",
1271 sub wormize_biblio_record {
1276 if ( ref($rec) && ref($rec) =~ /HASH/o ) {
1277 $rec = OpenILS::Application::Ingest->storage_req(
1278 'open-ils.storage.id_list.biblio.record_entry.search_where', $rec
1284 if (!OpenILS::Application::Ingest->in_transaction) {
1285 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1291 # clean up the cruft
1292 unless ($self->api_name =~ /noscrub/o) {
1293 $self->method_lookup( 'open-ils.worm.scrub.biblio' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1297 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
1300 my @rec_descriptor = ();
1308 my %metarecord = ();
1309 my @source_map = ();
1310 for my $r (@$bibs) {
1312 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'extract_data'.$r->id );
1314 my $xml = $parser->parse_string($r->marc);
1316 #update the fingerprint
1317 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $xml );
1318 OpenILS::Application::Ingest->storage_req(
1319 'open-ils.storage.direct.biblio.record_entry.remote_update',
1321 { fingerprint => $fp->{fingerprint},
1322 quality => int($fp->{quality}) }
1323 ) if ($fp->{fingerprint} ne $r->fingerprint || int($fp->{quality}) ne $r->quality);
1325 # the full_rec stuff
1326 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.biblio.xml' )->run( $xml ) ) {
1327 $fr->record( $r->id );
1328 push @full_rec, $fr;
1331 # the rec_descriptor stuff
1332 my ($rd) = $self->method_lookup( 'open-ils.worm.biblio_leader.xml' )->run( $xml );
1333 $rd->record( $r->id );
1334 push @rec_descriptor, $rd;
1336 # the indexing field entry stuff
1337 for my $class ( qw/title author subject keyword series/ ) {
1338 for my $fe ( $self->method_lookup( 'open-ils.worm.field_entry.class.xml' )->run( $xml, $class ) ) {
1339 $fe->source( $r->id );
1340 push @{$field_entry{$class}}, $fe;
1344 unless ($self->api_name =~ /nomap/o) {
1345 my $mr = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic', $fp->{fingerprint} )->[0];
1348 $mr = Fieldmapper::metabib::metarecord->new;
1349 $mr->fingerprint( $fp->{fingerprint} );
1350 $mr->master_record( $r->id );
1351 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
1354 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
1355 $mr_map->metarecord( $mr->id );
1356 $mr_map->source( $r->id );
1357 push @source_map, $mr_map;
1359 $metarecord{$mr->id} = $mr;
1361 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'extract_data'.$r->id );
1363 $log->debug('Data extraction failed for record '.$r->id.': '.shift(), ERROR);
1364 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'extract_data'.$r->id );
1369 if (@rec_descriptor) {
1370 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_record' );
1372 OpenILS::Application::Ingest->storage_req(
1373 'open-ils.storage.direct.metabib.metarecord_source_map.batch.create',
1377 for my $mr ( values %metarecord ) {
1378 my $sources = OpenILS::Application::Ingest->storage_req(
1379 'open-ils.storage.direct.metabib.metarecord_source_map.search.metarecord.atomic',
1383 my $bibs = OpenILS::Application::Ingest->storage_req(
1384 'open-ils.storage.direct.biblio.record_entry.search.id.atomic',
1385 [ map { $_->source } @$sources ]
1388 my $master = ( sort { $b->quality <=> $a->quality } @$bibs )[0];
1390 OpenILS::Application::Ingest->storage_req(
1391 'open-ils.storage.direct.metabib.metarecord.remote_update',
1393 { master_record => $master->id, mods => undef }
1397 OpenILS::Application::Ingest->storage_req(
1398 'open-ils.storage.direct.metabib.record_descriptor.batch.create',
1400 ) if (@rec_descriptor);
1402 OpenILS::Application::Ingest->storage_req(
1403 'open-ils.storage.direct.metabib.full_rec.batch.create',
1407 OpenILS::Application::Ingest->storage_req(
1408 'open-ils.storage.direct.metabib.title_field_entry.batch.create',
1409 @{ $field_entry{title} }
1410 ) if (@{ $field_entry{title} });
1412 OpenILS::Application::Ingest->storage_req(
1413 'open-ils.storage.direct.metabib.author_field_entry.batch.create',
1414 @{ $field_entry{author} }
1415 ) if (@{ $field_entry{author} });
1417 OpenILS::Application::Ingest->storage_req(
1418 'open-ils.storage.direct.metabib.subject_field_entry.batch.create',
1419 @{ $field_entry{subject} }
1420 ) if (@{ $field_entry{subject} });
1422 OpenILS::Application::Ingest->storage_req(
1423 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create',
1424 @{ $field_entry{keyword} }
1425 ) if (@{ $field_entry{keyword} });
1427 OpenILS::Application::Ingest->storage_req(
1428 'open-ils.storage.direct.metabib.series_field_entry.batch.create',
1429 @{ $field_entry{series} }
1430 ) if (@{ $field_entry{series} });
1432 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_record' );
1438 $log->debug('Wormization failed : '.shift(), ERROR);
1439 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_record' );
1443 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1444 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1447 __PACKAGE__->register_method(
1448 api_name => "open-ils.worm.wormize.biblio",
1449 method => "wormize_biblio_record",
1453 __PACKAGE__->register_method(
1454 api_name => "open-ils.worm.wormize.biblio.nomap",
1455 method => "wormize_biblio_record",
1459 __PACKAGE__->register_method(
1460 api_name => "open-ils.worm.wormize.biblio.noscrub",
1461 method => "wormize_biblio_record",
1465 __PACKAGE__->register_method(
1466 api_name => "open-ils.worm.wormize.biblio.nomap.noscrub",
1467 method => "wormize_biblio_record",
1472 sub wormize_authority_record {
1478 if (!OpenILS::Application::Ingest->in_transaction) {
1479 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
1485 # clean up the cruft
1486 unless ($self->api_name =~ /noscrub/o) {
1487 $self->method_lookup( 'open-ils.worm.scrub.authority' )->run( $rec ) || throw OpenSRF::EX::PANIC ("Couldn't scrub record $rec!");
1491 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_entry.search.id.atomic', $rec );
1494 my @rec_descriptor = ();
1495 for my $r (@$bibs) {
1496 my $xml = $parser->parse_string($r->marc);
1498 # the full_rec stuff
1499 for my $fr ( $self->method_lookup( 'open-ils.worm.flat_marc.authority.xml' )->run( $xml ) ) {
1500 $fr->record( $r->id );
1501 push @full_rec, $fr;
1504 # the rec_descriptor stuff -- XXX What does this mean for authority records?
1505 #my ($rd) = $self->method_lookup( 'open-ils.worm.authority_leader.xml' )->run( $xml );
1506 #$rd->record( $r->id );
1507 #push @rec_descriptor, $rd;
1511 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.set', 'wormize_authority_record' );
1513 #OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.record_descriptor.batch.create', @rec_descriptor ) if (@rec_descriptor);
1514 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.authority.full_rec.batch.create', @full_rec ) if (@full_rec);
1516 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.release', 'wormize_authority_record' );
1519 $log->debug('Wormization failed : '.shift(), ERROR);
1520 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.savepoint.rollback', 'wormize_authority_record' );
1524 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
1525 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
1528 __PACKAGE__->register_method(
1529 api_name => "open-ils.worm.wormize.authority",
1530 method => "wormize_authority_record",
1534 __PACKAGE__->register_method(
1535 api_name => "open-ils.worm.wormize.authority.noscrub",
1536 method => "wormize_authority_record",
1542 # --------------------------------------------------------------------------------
1543 # MARC index extraction
1545 package OpenILS::Application::Ingest::XPATH;
1546 use base qw/OpenILS::Application::Ingest/;
1547 use Unicode::Normalize;
1549 # give this a MODS documentElement and an XPATH expression
1550 sub _xpath_to_string {
1554 my $ns_prefix = shift;
1557 $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
1561 # grab the set of matching nodes
1562 my @nodes = $xml->findnodes( $xpath );
1563 for my $value (@nodes) {
1565 # grab all children of the node
1566 my @children = $value->childNodes();
1567 for my $child (@children) {
1569 # add the childs content to the growing buffer
1570 my $content = quotemeta($child->textContent);
1571 next if ($unique && $string =~ /$content/); # uniquify the values
1572 $string .= $child->textContent . " ";
1575 $string .= $value->textContent . " ";
1578 return NFD($string);
1581 sub class_all_index_string_xml {
1587 OpenILS::Application::Ingest->post_init();
1588 $xml = $parser->parse_string($xml) unless (ref $xml);
1590 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
1591 for my $type ( keys %{ $xpathset->{$class} } ) {
1592 my $value = _xpath_to_string(
1593 $mods_sheet->transform($xml)->documentElement,
1594 $xpathset->{$class}->{$type}->{xpath},
1595 "http://www.loc.gov/mods/",
1602 $value = NFD($value);
1603 $value =~ s/\pM+//sgo;
1604 $value =~ s/\pC+//sgo;
1605 $value =~ s/\W+$//sgo;
1607 $value =~ s/(\w)\./$1/sgo;
1608 $value = lc($value);
1610 my $fm = $class_constructor->new;
1611 $fm->value( $value );
1612 $fm->field( $xpathset->{$class}->{$type}->{id} );
1613 $client->respond($fm);
1617 __PACKAGE__->register_method(
1618 api_name => "open-ils.worm.field_entry.class.xml",
1619 method => "class_all_index_string_xml",
1625 sub class_all_index_string_record {
1631 OpenILS::Application::Ingest->post_init();
1632 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1634 for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
1636 $client->respond($fm);
1640 __PACKAGE__->register_method(
1641 api_name => "open-ils.worm.field_entry.class.record",
1642 method => "class_all_index_string_record",
1649 sub class_index_string_xml {
1656 OpenILS::Application::Ingest->post_init();
1657 $xml = $parser->parse_string($xml) unless (ref $xml);
1658 return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
1660 __PACKAGE__->register_method(
1661 api_name => "open-ils.worm.class.type.xml",
1662 method => "class_index_string_xml",
1667 sub class_index_string_record {
1674 OpenILS::Application::Ingest->post_init();
1675 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1677 my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
1678 $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
1681 __PACKAGE__->register_method(
1682 api_name => "open-ils.worm.class.type.record",
1683 method => "class_index_string_record",
1697 OpenILS::Application::Ingest->post_init();
1698 $xml = $parser->parse_string($xml) unless (ref $xml);
1699 return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
1701 __PACKAGE__->register_method(
1702 api_name => "open-ils.worm.xpath.xml",
1703 method => "xml_xpath",
1717 OpenILS::Application::Ingest->post_init();
1718 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1720 my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
1721 $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
1724 __PACKAGE__->register_method(
1725 api_name => "open-ils.worm.xpath.record",
1726 method => "record_xpath",
1732 # --------------------------------------------------------------------------------
1735 package OpenILS::Application::Ingest::Biblio::Leader;
1736 use base qw/OpenILS::Application::Ingest/;
1737 use Unicode::Normalize;
1739 our %marc_type_groups = (
1742 VIS => q/[gkro]{1}/,
1751 my $re = '^'. join('|', $marc_type_groups{@_}) .'$';
1755 our %biblio_descriptor_code = (
1756 item_type => sub { substr($ldr,6,1); },
1759 if (substr($ldr,6,1) =~ _type_re( qw/MAP VIS/ )) {
1760 return substr($oo8,29,1);
1761 } elsif (substr($ldr,6,1) =~ _type_re( qw/BKS SER MIX SCO REC/ )) {
1762 return substr($oo8,23,1);
1766 bib_level => sub { substr($ldr,7,1); },
1767 control_type => sub { substr($ldr,8,1); },
1768 char_encoding => sub { substr($ldr,9,1); },
1769 enc_level => sub { substr($ldr,17,1); },
1770 cat_form => sub { substr($ldr,18,1); },
1771 pub_status => sub { substr($ldr,5,1); },
1772 item_lang => sub { substr($oo8,35,3); },
1773 lit_form => sub { (substr($ldr,6,1) =~ _type_re('BKS')) ? substr($oo8,33,1) : undef; },
1774 type_mat => sub { (substr($ldr,6,1) =~ _type_re('VIS')) ? substr($oo8,33,1) : undef; },
1775 audience => sub { substr($oo8,22,1); },
1778 sub _extract_biblio_descriptors {
1781 local $ldr = $xml->findvalue('//*[local-name()="leader"]');
1782 local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1783 local $oo7 = $xml->findvalue('//*[local-name()="controlfield" and @tag="007"]');
1785 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
1786 for my $rd_field ( keys %biblio_descriptor_code ) {
1787 $rd_obj->$rd_field( $biblio_descriptor_code{$rd_field}->() );
1793 sub extract_biblio_desc_xml {
1798 $xml = $parser->parse_string($xml) unless (ref $xml);
1800 return _extract_biblio_descriptors( $xml );
1802 __PACKAGE__->register_method(
1803 api_name => "open-ils.worm.biblio_leader.xml",
1804 method => "extract_biblio_desc_xml",
1809 sub extract_biblio_desc_record {
1814 OpenILS::Application::Ingest->post_init();
1815 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec );
1817 my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
1818 $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
1821 __PACKAGE__->register_method(
1822 api_name => "open-ils.worm.biblio_leader.record",
1823 method => "extract_biblio_desc_record",
1828 # --------------------------------------------------------------------------------
1831 package OpenILS::Application::Ingest::FlatMARC;
1832 use base qw/OpenILS::Application::Ingest/;
1833 use Unicode::Normalize;
1836 sub _marcxml_to_full_rows {
1838 my $marcxml = shift;
1839 my $xmltype = shift || 'metabib';
1841 my $type = "Fieldmapper::${xmltype}::full_rec";
1845 my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
1847 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1848 next unless $tagline;
1850 my $ns = $type->new;
1853 my $val = $tagline->textContent;
1855 $val =~ s/\pM+//sgo;
1856 $val =~ s/\pC+//sgo;
1857 $val =~ s/\W+$//sgo;
1863 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1864 next unless $tagline;
1866 my $ns = $type->new;
1868 $ns->tag( $tagline->getAttribute( "tag" ) );
1869 my $val = $tagline->textContent;
1871 $val =~ s/\pM+//sgo;
1872 $val =~ s/\pC+//sgo;
1873 $val =~ s/\W+$//sgo;
1879 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1880 next unless $tagline;
1882 my $tag = $tagline->getAttribute( "tag" );
1883 my $ind1 = $tagline->getAttribute( "ind1" );
1884 my $ind2 = $tagline->getAttribute( "ind2" );
1886 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
1889 my $ns = $type->new;
1894 $ns->subfield( $data->getAttribute( "code" ) );
1895 my $val = $data->textContent;
1897 $val =~ s/\pM+//sgo;
1898 $val =~ s/\pC+//sgo;
1899 $val =~ s/\W+$//sgo;
1900 $ns->value( lc($val) );
1906 $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
1915 $xml = $parser->parse_string($xml) unless (ref $xml);
1917 my $type = 'metabib';
1918 $type = 'authority' if ($self->api_name =~ /authority/o);
1920 OpenILS::Application::Ingest->post_init();
1922 $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
1925 __PACKAGE__->register_method(
1926 api_name => "open-ils.worm.flat_marc.authority.xml",
1927 method => "flat_marc_xml",
1932 __PACKAGE__->register_method(
1933 api_name => "open-ils.worm.flat_marc.biblio.xml",
1934 method => "flat_marc_xml",
1940 sub flat_marc_record {
1945 my $type = 'biblio';
1946 $type = 'authority' if ($self->api_name =~ /authority/o);
1948 OpenILS::Application::Ingest->post_init();
1949 my $r = OpenILS::Application::Ingest->storage_req( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec );
1951 $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
1954 __PACKAGE__->register_method(
1955 api_name => "open-ils.worm.flat_marc.biblio.record_entry",
1956 method => "flat_marc_record",
1961 __PACKAGE__->register_method(
1962 api_name => "open-ils.worm.flat_marc.authority.record_entry",
1963 method => "flat_marc_record",
1970 # --------------------------------------------------------------------------------
1973 package OpenILS::Application::Ingest::Biblio::Fingerprint;
1974 use base qw/OpenILS::Application::Ingest/;
1975 use Unicode::Normalize;
1976 use OpenSRF::EX qw/:try/;
1978 my @fp_mods_xpath = (
1979 '//mods:mods/mods:typeOfResource[text()="text"]' => [
1982 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
1983 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
1984 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
1985 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
1988 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1990 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1991 $text =~ s/\pM+//gso;
1992 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1994 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1995 $text =~ s/\s+/ /sgo;
1996 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1997 $text =~ s/^\s*(.+)\s*$/$1/sgo;
1998 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
1999 $text =~ s/\b(?:the|an?)\b//sgo;
2000 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2001 $text =~ s/\[.[^\]]+\]//sgo;
2002 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2003 $text =~ s/\s*[;\/\.]*$//sgo;
2004 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2009 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2010 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2013 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2015 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2016 $text =~ s/\pM+//gso;
2017 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2019 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2020 $text =~ s/\s+/ /sgo;
2021 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2022 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2023 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2024 $text =~ s/,?\s+.*$//sgo;
2025 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2030 '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
2033 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
2034 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
2035 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
2036 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
2037 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
2038 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
2039 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
2040 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
2043 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2045 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2046 $text =~ s/\pM+//gso;
2047 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2049 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2050 $text =~ s/\s+/ /sgo;
2051 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2052 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2053 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2054 $text =~ s/\b(?:the|an?)\b//sgo;
2055 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2056 $text =~ s/\[.[^\]]+\]//sgo;
2057 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2058 $text =~ s/\s*[;\/\.]*$//sgo;
2059 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2064 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2065 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2066 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
2067 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
2070 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2072 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2073 $text =~ s/\pM+//gso;
2074 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2076 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2077 $text =~ s/\s+/ /sgo;
2078 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2079 $text =~ s/^\s*(.+)\s*$/$1/sgo;
2080 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2081 $text =~ s/,?\s+.*$//sgo;
2082 $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
2089 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
2093 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2097 my $match_index = 0;
2098 my $block_index = 1;
2099 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
2100 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
2102 my $block_name_index = 0;
2103 my $block_value_index = 1;
2104 my $block = $fp_mods_xpath[$block_index];
2105 while ( my $part = $$block[$block_value_index] ) {
2107 for my $xpath ( @{ $part->{xpath} } ) {
2108 $text = $mods->findvalue( $xpath );
2112 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
2116 $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
2117 $fp_string .= $text;
2120 $block_name_index += 2;
2121 $block_value_index += 2;
2125 $fp_string =~ s/\W+//gso;
2126 $log->debug("Fingerprint is [$fp_string]", INFO);;
2136 sub refingerprint_bibrec {
2142 if (!OpenILS::Application::Ingest->in_transaction) {
2143 OpenILS::Application::Ingest->begin_transaction($client) || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
2149 my $bibs = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.search.id.atomic', $rec );
2150 for my $b (@$bibs) {
2151 my ($fp) = $self->method_lookup( 'open-ils.worm.fingerprint.marc' )->run( $b->marc );
2153 if ($b->fingerprint ne $fp->{fingerprint} || $b->quality != $fp->{quality}) {
2155 $log->debug("Updating ".$b->id." with fingerprint [$fp->{fingerprint}], quality [$fp->{quality}]", INFO);;
2157 OpenILS::Application::Ingest->storage_req(
2158 'open-ils.storage.direct.biblio.record_entry.remote_update',
2160 { fingerprint => $fp->{fingerprint},
2161 quality => $fp->{quality} }
2164 if ($self->api_name !~ /nomap/o) {
2165 my $old_source_map = OpenILS::Application::Ingest->storage_req(
2166 'open-ils.storage.direct.metabib.metarecord_source_map.search.source.atomic',
2171 if (ref($old_source_map) and @$old_source_map) {
2172 for my $m (@$old_source_map) {
2173 $old_mrid = $m->metarecord;
2174 OpenILS::Application::Ingest->storage_req(
2175 'open-ils.storage.direct.metabib.metarecord_source_map.delete',
2181 my $old_sm = OpenILS::Application::Ingest->storage_req(
2182 'open-ils.storage.direct.metabib.metarecord_source_map.search.atomic',
2183 { metarecord => $old_mrid }
2186 if (ref($old_sm) and @$old_sm == 0) {
2187 OpenILS::Application::Ingest->storage_req(
2188 'open-ils.storage.direct.metabib.metarecord.delete',
2193 my $mr = OpenILS::Application::Ingest->storage_req(
2194 'open-ils.storage.direct.metabib.metarecord.search.fingerprint.atomic',
2195 { fingerprint => $fp->{fingerprint} }
2199 $mr = Fieldmapper::metabib::metarecord->new;
2200 $mr->fingerprint( $fp->{fingerprint} );
2201 $mr->master_record( $b->id );
2202 $mr->id( OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord.create', $mr) );
2205 my $mr_map = Fieldmapper::metabib::metarecord_source_map->new;
2206 $mr_map->metarecord( $mr->id );
2207 $mr_map->source( $b->id );
2208 OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.create', $mr_map );
2212 $client->respond($b->id);
2216 $log->debug('Fingerprinting failed : '.shift(), ERROR);
2220 OpenILS::Application::Ingest->commit_transaction if ($commit && $success);
2221 OpenILS::Application::Ingest->rollback_transaction if ($commit && !$success);
2224 __PACKAGE__->register_method(
2225 api_name => "open-ils.worm.fingerprint.record.update",
2226 method => "refingerprint_bibrec",
2232 __PACKAGE__->register_method(
2233 api_name => "open-ils.worm.fingerprint.record.update.nomap",
2234 method => "refingerprint_bibrec",
2241 sub fingerprint_bibrec {
2246 OpenILS::Application::Ingest->post_init();
2247 my $r = OpenILS::Application::Ingest->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec );
2249 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
2250 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2254 __PACKAGE__->register_method(
2255 api_name => "open-ils.worm.fingerprint.record",
2256 method => "fingerprint_bibrec",
2262 sub fingerprint_mods {
2267 OpenILS::Application::Ingest->post_init();
2268 my $mods = $parser->parse_string($xml)->documentElement;
2270 return _fp_mods( $mods );
2272 __PACKAGE__->register_method(
2273 api_name => "open-ils.worm.fingerprint.mods",
2274 method => "fingerprint_mods",
2279 sub fingerprint_marc {
2284 $xml = $parser->parse_string($xml) unless (ref $xml);
2286 OpenILS::Application::Ingest->post_init();
2287 my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
2288 $log->debug("Returning [$fp] as fingerprint", INFO);
2291 __PACKAGE__->register_method(
2292 api_name => "open-ils.worm.fingerprint.marc",
2293 method => "fingerprint_marc",
2301 sub biblio_fingerprint_record {
2306 OpenILS::Application::Ingest->post_init();
2308 my $marc = OpenILS::Application::Ingest
2309 ->storage_req( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )
2312 my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($marc);
2313 $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
2316 __PACKAGE__->register_method(
2317 api_name => "open-ils.worm.fingerprint.record",
2318 method => "biblio_fingerprint_record",
2324 sub biblio_fingerprint {
2329 OpenILS::Application::Ingest->post_init();
2331 $marc = $parser->parse_string($marc) unless (ref $marc);
2333 my $mods = OpenILS::Application::Ingest::entityize(
2335 ->transform( $marc )
2341 $marc = OpenILS::Application::Ingest::entityize( $marc->documentElement->toString => 'D' );
2344 $log->internal("Got MARC [$marc]");
2345 $log->internal("Created MODS [$mods]");
2348 my @pfx = ( "apps", "open-ils.storage","app_settings" );
2349 my $conf = OpenSRF::Utils::SettingsClient->new;
2351 my $libs = $conf->config_value(@pfx, 'script_path');
2352 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
2353 my $script_libs = (ref($libs)) ? $libs : [$libs];
2355 $log->debug("Loading script $script_file for biblio fingerprinting...");
2357 $fp_script = new OpenILS::Utils::ScriptRunner
2358 ( file => $script_file,
2359 paths => $script_libs,
2360 reset_count => 1000 );
2363 $log->debug("Applying environment for biblio fingerprinting...");
2365 my $env = {marc => $marc, mods => $mods};
2366 #my $res = {fingerprint => '', quality => '0'};
2368 $fp_script->insert('environment' => $env);
2369 #$fp_script->insert('result' => $res);
2371 $log->debug("Running script for biblio fingerprinting...");
2373 my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return 0);
2375 $log->debug("Script for biblio fingerprinting completed successfully...");
2379 __PACKAGE__->register_method(
2380 api_name => "open-ils.worm.fingerprint.marc",
2381 method => "biblio_fingerprint",
2386 # --------------------------------------------------------------------------------
2400 my $create_source_map;
2415 my %descriptor_code = (
2416 item_type => 'substr($ldr,6,1)',
2417 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
2418 bib_level => 'substr($ldr,7,1)',
2419 control_type => 'substr($ldr,8,1)',
2420 char_encoding => 'substr($ldr,9,1)',
2421 enc_level => 'substr($ldr,17,1)',
2422 cat_form => 'substr($ldr,18,1)',
2423 pub_status => 'substr($ldr,5,1)',
2424 item_lang => 'substr($oo8,35,3)',
2425 #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
2426 audience => 'substr($oo8,22,1)',
2436 if ($self->api_name =~ /no_map/o) {
2440 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2442 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2444 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2446 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2448 $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
2449 unless ($sm_lookup);
2450 $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
2451 unless ($mr_lookup);
2452 $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
2453 unless ($mr_update);
2454 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
2456 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
2457 unless ($update_entry);
2458 $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
2459 unless ($rm_old_sm);
2460 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
2461 unless ($rm_old_rd);
2462 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
2463 unless ($rm_old_fr);
2464 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
2465 unless ($rm_old_tr);
2466 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
2467 unless ($rm_old_ar);
2468 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
2469 unless ($rm_old_sr);
2470 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
2471 unless ($rm_old_kr);
2472 $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
2473 unless ($rm_old_ser);
2474 $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
2475 unless ($mr_create);
2476 $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
2477 unless ($create_source_map);
2478 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
2479 unless ($rd_create);
2480 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
2481 unless ($fr_create);
2482 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
2483 unless ($$create{title});
2484 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
2485 unless ($$create{author});
2486 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
2487 unless ($$create{subject});
2488 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
2489 unless ($$create{keyword});
2490 $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
2491 unless ($$create{series});
2494 my ($outer_xact) = $in_xact->run;
2496 unless ($outer_xact) {
2497 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2498 my ($r) = $begin->run($client);
2499 unless (defined $r and $r) {
2501 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2504 } catch Error with {
2505 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2515 for my $entry ( $lookup->run(@docids) ) {
2516 # step -1: grab the doc from storage
2517 next unless ($entry);
2520 my $xslt_doc = $parser->parse_file(
2521 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2522 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
2525 my $xml = $entry->marc;
2526 my $docid = $entry->id;
2527 my $marcdoc = $parser->parse_string($xml);
2528 my $modsdoc = $mods_sheet->transform($marcdoc);
2530 my $mods = $modsdoc->documentElement;
2531 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
2533 $entry->fingerprint( fingerprint_mods( $mods ) );
2534 push @entry_list, $entry;
2536 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
2539 my ($mr) = $mr_lookup->run( $entry->fingerprint );
2540 if (!$mr || !@$mr) {
2541 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
2542 $mr = new Fieldmapper::metabib::metarecord;
2543 $mr->fingerprint( $entry->fingerprint );
2544 $mr->master_record( $entry->id );
2545 my ($new_mr) = $mr_create->run($mr);
2547 unless (defined $mr) {
2548 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
2551 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
2556 my $sm = new Fieldmapper::metabib::metarecord_source_map;
2557 $sm->metarecord( $mr->id );
2558 $sm->source( $entry->id );
2559 push @source_maps, $sm;
2562 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2563 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2565 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
2566 for my $rd_field ( keys %descriptor_code ) {
2567 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2569 $rd_obj->record( $docid );
2570 push @rd_list, $rd_obj;
2572 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
2574 # step 2: build the KOHA rows
2575 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
2576 $_->record( $docid ) for (@tmp_list);
2577 push @ns_list, @tmp_list;
2581 last unless ($self->api_name =~ /batch$/o);
2584 $rm_old_rd->run( { record => \@docids } );
2585 $rm_old_fr->run( { record => \@docids } );
2586 $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
2587 $rm_old_tr->run( { source => \@docids } );
2588 $rm_old_ar->run( { source => \@docids } );
2589 $rm_old_sr->run( { source => \@docids } );
2590 $rm_old_kr->run( { source => \@docids } );
2591 $rm_old_ser->run( { source => \@docids } );
2594 my ($sm) = $create_source_map->run(@source_maps);
2595 unless (defined $sm) {
2596 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
2598 my ($mr) = $mr_update->run(@mr_list);
2599 unless (defined $mr) {
2600 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
2604 my ($re) = $update_entry->run(@entry_list);
2605 unless (defined $re) {
2606 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
2609 my ($rd) = $rd_create->run(@rd_list);
2610 unless (defined $rd) {
2611 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
2614 my ($fr) = $fr_create->run(@ns_list);
2615 unless (defined $fr) {
2616 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
2619 # step 5: insert the new metadata
2620 for my $class ( qw/title author subject keyword series/ ) {
2622 for my $doc ( @mods_data ) {
2623 my ($did) = keys %$doc;
2624 my ($data) = values %$doc;
2626 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
2627 for my $row ( keys %{ $$data{$class} } ) {
2628 next unless (exists $$data{$class}{$row});
2629 next unless ($$data{$class}{$row}{value});
2630 my $fm_obj = $fm_constructor->new;
2631 $fm_obj->value( $$data{$class}{$row}{value} );
2632 $fm_obj->field( $$data{$class}{$row}{field_id} );
2633 $fm_obj->source( $did );
2634 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
2636 push @md_list, $fm_obj;
2640 my ($cr) = $$create{$class}->run(@md_list);
2641 unless (defined $cr) {
2642 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
2646 unless ($outer_xact) {
2647 $log->debug("Commiting transaction started by the Ingest.", INFO);
2648 my ($c) = $commit->run;
2649 unless (defined $c and $c) {
2651 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2657 __PACKAGE__->register_method(
2658 api_name => "open-ils.worm.wormize",
2659 method => "wormize",
2663 __PACKAGE__->register_method(
2664 api_name => "open-ils.worm.wormize.no_map",
2665 method => "wormize",
2669 __PACKAGE__->register_method(
2670 api_name => "open-ils.worm.wormize.batch",
2671 method => "wormize",
2675 __PACKAGE__->register_method(
2676 api_name => "open-ils.worm.wormize.no_map.batch",
2677 method => "wormize",
2692 my $acreate_source_map;
2707 sub authority_wormize {
2714 if ($self->api_name =~ /no_map/o) {
2718 $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
2720 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
2722 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
2724 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
2726 $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
2728 $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
2729 unless ($aupdate_entry);
2730 $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
2731 unless ($arm_old_rd);
2732 $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
2733 unless ($arm_old_fr);
2734 $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
2735 unless ($ard_create);
2736 $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
2737 unless ($afr_create);
2740 my ($outer_xact) = $in_xact->run;
2742 unless ($outer_xact) {
2743 $log->debug("Ingest isn't inside a transaction, starting one now.", INFO);
2744 my ($r) = $begin->run($client);
2745 unless (defined $r and $r) {
2747 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
2750 } catch Error with {
2751 throw OpenSRF::EX::PANIC ("Ingest Couldn't BEGIN transaction!")
2761 for my $entry ( $lookup->run(@docids) ) {
2762 # step -1: grab the doc from storage
2763 next unless ($entry);
2766 # my $xslt_doc = $parser->parse_file(
2767 # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl");
2768 # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
2771 my $xml = $entry->marc;
2772 my $docid = $entry->id;
2773 my $marcdoc = $parser->parse_string($xml);
2774 #my $madsdoc = $mads_sheet->transform($marcdoc);
2776 #my $mads = $madsdoc->documentElement;
2777 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
2779 push @entry_list, $entry;
2781 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
2782 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
2784 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
2785 for my $rd_field ( keys %descriptor_code ) {
2786 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
2788 $rd_obj->record( $docid );
2789 push @rd_list, $rd_obj;
2791 # step 2: build the KOHA rows
2792 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
2793 $_->record( $docid ) for (@tmp_list);
2794 push @ns_list, @tmp_list;
2798 last unless ($self->api_name =~ /batch$/o);
2801 $arm_old_rd->run( { record => \@docids } );
2802 $arm_old_fr->run( { record => \@docids } );
2804 my ($rd) = $ard_create->run(@rd_list);
2805 unless (defined $rd) {
2806 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
2809 my ($fr) = $fr_create->run(@ns_list);
2810 unless (defined $fr) {
2811 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
2814 unless ($outer_xact) {
2815 $log->debug("Commiting transaction started by Ingest.", INFO);
2816 my ($c) = $commit->run;
2817 unless (defined $c and $c) {
2819 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
2825 __PACKAGE__->register_method(
2826 api_name => "open-ils.worm.authortiy.wormize",
2827 method => "wormize",
2831 __PACKAGE__->register_method(
2832 api_name => "open-ils.worm.authority.wormize.batch",
2833 method => "wormize",
2839 # --------------------------------------------------------------------------------
2842 sub _marcxml_to_full_rows {
2844 my $marcxml = shift;
2845 my $type = shift || 'Fieldmapper::metabib::full_rec';
2849 my $root = $marcxml->documentElement;
2851 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
2852 next unless $tagline;
2854 my $ns = new Fieldmapper::metabib::full_rec;
2857 my $val = NFD($tagline->textContent);
2858 $val =~ s/(\pM+)//gso;
2864 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
2865 next unless $tagline;
2867 my $ns = new Fieldmapper::metabib::full_rec;
2869 $ns->tag( $tagline->getAttribute( "tag" ) );
2870 my $val = NFD($tagline->textContent);
2871 $val =~ s/(\pM+)//gso;
2877 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
2878 next unless $tagline;
2880 my $tag = $tagline->getAttribute( "tag" );
2881 my $ind1 = $tagline->getAttribute( "ind1" );
2882 my $ind2 = $tagline->getAttribute( "ind2" );
2884 for my $data ( $tagline->childNodes ) {
2887 my $ns = $type->new;
2892 $ns->subfield( $data->getAttribute( "code" ) );
2893 my $val = NFD($data->textContent);
2894 $val =~ s/(\pM+)//gso;
2895 $ns->value( lc($val) );
2903 sub _get_field_value {
2905 my( $root, $xpath ) = @_;
2909 # grab the set of matching nodes
2910 my @nodes = $root->findnodes( $xpath );
2911 for my $value (@nodes) {
2913 # grab all children of the node
2914 my @children = $value->childNodes();
2915 for my $child (@children) {
2917 # add the childs content to the growing buffer
2918 my $content = quotemeta($child->textContent);
2919 next if ($string =~ /$content/); # uniquify the values
2920 $string .= $child->textContent . " ";
2923 $string .= $value->textContent . " ";
2926 $string = NFD($string);
2927 $string =~ s/(\pM)//gso;
2932 sub modsdoc_to_values {
2933 my( $self, $mods ) = @_;
2935 for my $class (keys %$xpathset) {
2936 $data->{$class} = {};
2937 for my $type (keys %{$xpathset->{$class}}) {
2938 $data->{$class}->{$type} = {};
2939 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};