From bc073517f19e89028feb5921d6c438beb5430d5c Mon Sep 17 00:00:00 2001 From: Jeff Godin Date: Tue, 7 May 2013 15:58:51 -0400 Subject: [PATCH] LP#1187029 Remove unused open-ils.ingest service Remove open-ils.ingest service and related scripts and code. In addition to removing OpenILS::Application::Ingest, this commit removes code that was commented out at the time of open-ils.ingest's replacement with in-db ingest, example configuration elements, a use_ok test, and the following obsolete scripts which made use of or were otherwise related to open-ils.ingest: * Open-ILS/src/extras/import/direct_ingest.pl * Open-ILS/src/extras/import/generate-srfsh-indexer.pl * Open-ILS/src/extras/import/importer.sh The included release notes recommend removal of the open-ils.ingest service from configuration files when upgrading, and suggest removal of Ingest.pm from its location in @INC. Signed-off-by: Jeff Godin Signed-off-by: Bill Erickson --- Open-ILS/examples/opensrf.xml.example | 28 - Open-ILS/src/extras/import/direct_ingest.pl | 122 -- .../extras/import/generate-srfsh-indexer.pl | 30 - Open-ILS/src/extras/import/importer.sh | 36 - Open-ILS/src/perlmods/MANIFEST | 1 - .../perlmods/lib/OpenILS/Application/Cat.pm | 6 - .../lib/OpenILS/Application/Cat/AuthCommon.pm | 8 - .../lib/OpenILS/Application/Ingest.pm | 1453 ----------------- .../src/perlmods/t/01-OpenILS-Application.t | 3 +- docs/RELEASE_NOTES_NEXT/removal_of_ingest.txt | 21 + 10 files changed, 22 insertions(+), 1686 deletions(-) delete mode 100755 Open-ILS/src/extras/import/direct_ingest.pl delete mode 100755 Open-ILS/src/extras/import/generate-srfsh-indexer.pl delete mode 100755 Open-ILS/src/extras/import/importer.sh delete mode 100644 Open-ILS/src/perlmods/lib/OpenILS/Application/Ingest.pm create mode 100644 docs/RELEASE_NOTES_NEXT/removal_of_ingest.txt diff --git a/Open-ILS/examples/opensrf.xml.example b/Open-ILS/examples/opensrf.xml.example index 658ced15d7..d2cb911ad9 100644 --- a/Open-ILS/examples/opensrf.xml.example +++ b/Open-ILS/examples/opensrf.xml.example @@ -838,33 +838,6 @@ vim:et:ts=4:sw=4: - - 3 - 1 - OpenILS::Application::Ingest - perl - 1000000 - - 1000000 - open-ils.ingest-unix.log - open-ils.ingest-unix.sock - open-ils.ingest-unix.pid - 5 - 20 - 2 - 5 - - - LIBDIR/javascript/ - LOCALSTATEDIR/catalog/ - LOCALSTATEDIR/web/opac/common/js/ - - biblio_fingerprint.js - biblio_descriptor.js - - - - 10 1 @@ -1220,7 +1193,6 @@ vim:et:ts=4:sw=4: open-ils.justintime open-ils.cstore open-ils.collections - open-ils.ingest open-ils.reporter open-ils.reporter-store diff --git a/Open-ILS/src/extras/import/direct_ingest.pl b/Open-ILS/src/extras/import/direct_ingest.pl deleted file mode 100755 index e25193e47e..0000000000 --- a/Open-ILS/src/extras/import/direct_ingest.pl +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -use OpenSRF::System; -use OpenSRF::EX qw/:try/; -use OpenSRF::AppSession; -use OpenSRF::Application; -use OpenSRF::MultiSession; -use OpenSRF::Utils::SettingsClient; -use OpenILS::Application::Ingest; -use OpenILS::Application::AppUtils; -use OpenILS::Utils::Fieldmapper; -use Digest::MD5 qw/md5_hex/; -use OpenSRF::Utils::JSON; -use Data::Dumper; -use FileHandle; - -use Time::HiRes qw/time/; -use Getopt::Long; -use MARC::Batch; -use MARC::File::XML (BinaryEncoding => 'UTF-8'); -use MARC::Charset; - -MARC::Charset->ignore_errors(1); - -my ($max_uri, $max_cn, $auth, $config, $quiet) = - (0, 0, 0, '/openils/conf/opensrf_core.xml'); - -GetOptions( - 'config=s' => \$config, - 'authority' => \$auth, - 'quiet' => \$quiet, - 'max_uri=i' => \$max_uri, - 'max_cn=i' => \$max_cn, -); - -my @ses; - -open NEWERR, ">&STDERR"; - -select NEWERR; $| = 1; -select STDERR; $| = 1; -select STDOUT; $| = 1; - -OpenSRF::System->bootstrap_client( config_file => $config ); -Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); - -OpenILS::Application::Ingest->use; - -die "We have no more use for authority or biblio ingest ... just insert the are or bre objects and you're done!\n"; - -my $meth = 'open-ils.ingest.full.biblio.object.readonly'; -$meth = 'open-ils.ingest.full.authority.object.readonly' if ($auth); - -$meth = OpenILS::Application::Ingest->method_lookup( $meth ); - -my $count = 0; -my $starttime = time; -while (my $rec = <>) { - next unless ($rec); - - my $bib = OpenSRF::Utils::JSON->JSON2perl($rec); - my $data; - - try { - ($data) = $meth->run( $bib => $max_cn => $max_uri ); - } catch Error with { - my $e = shift; - warn "Couldn't process record: $e\n >>> $rec\n"; - }; - - next unless $data; - - postprocess( { bib => $bib, ingest_data => $data } ); - - if (!$quiet && !($count % 20)) { - print NEWERR "\r$count\t". $count / (time - $starttime); - } - - $count++; -} - -sub postprocess { - my $data = shift; - - my ($field_entries, $fp, $rd, $uri); - - my $bib = $data->{bib}; - my $full_rec = $data->{ingest_data}->{full_rec}; - - if (!$auth) { - $field_entries = $data->{ingest_data}->{field_entries}; - $fp = $data->{ingest_data}->{fingerprint}; - $rd = $data->{ingest_data}->{descriptor}; - $uri = $data->{ingest_data}->{uri}; - - $bib->fingerprint( $fp->{fingerprint} ); - $bib->quality( $fp->{quality} ); - } - - print( OpenSRF::Utils::JSON->perl2JSON($bib)."\n" ); - if (!$auth) { - print( OpenSRF::Utils::JSON->perl2JSON($rd)."\n" ); - print( OpenSRF::Utils::JSON->perl2JSON($_)."\n" ) for (@$field_entries); - for my $u (@$uri) { - print( OpenSRF::Utils::JSON->perl2JSON($u->{call_number})."\n" ) if $u->{call_number}->isnew; - print( OpenSRF::Utils::JSON->perl2JSON($u->{uri})."\n" ) if $u->{uri}->isnew; - - my $umap = Fieldmapper::asset::uri_call_number_map->new; - $umap->uri($u->{uri}->id); - $umap->call_number($u->{call_number}->id); - print( OpenSRF::Utils::JSON->perl2JSON($umap)."\n" ); - - $max_cn = $u->{call_number}->id + 1 if $u->{call_number}->isnew; - $max_uri = $u->{uri}->id + 1 if $u->{uri}->isnew; - } - } - - print( OpenSRF::Utils::JSON->perl2JSON($_)."\n" ) for (@$full_rec); -} - diff --git a/Open-ILS/src/extras/import/generate-srfsh-indexer.pl b/Open-ILS/src/extras/import/generate-srfsh-indexer.pl deleted file mode 100755 index 48ed11e7c6..0000000000 --- a/Open-ILS/src/extras/import/generate-srfsh-indexer.pl +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; - -die "Obsolete ... records are ingested by stored procs within the db\n"; - -my ($start, $stop, $count, $group, $out, $method) = (1,1,1,50,'dynamic-reindex-script.sfsh', 'open-ils.ingest.full.biblio.record_list'); -GetOptions ( "start=i" => \$start, - "end=i" => \$stop, - "groupsize=i" => \$group, - "count=i" => \$count, - "output=s" => \$out, - "method=s" => \$method, -); - -$stop = $start + $count unless ($stop); - -open SFSH, ">$out" or die("Can't open $out! $!"); - -my @list; -for my $i ( $start .. $stop ) { - if ( $i % $group ) { - push @list, $i; - next; - } - push @list, $i; - print SFSH "request open-ils.ingest $method [".join(',', @list)."]\n" if (@list); - @list = (); -} -print SFSH "request open-ils.ingest $method [".join(',', @list)."]\n" if (@list); diff --git a/Open-ILS/src/extras/import/importer.sh b/Open-ILS/src/extras/import/importer.sh deleted file mode 100755 index ac153259b0..0000000000 --- a/Open-ILS/src/extras/import/importer.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh - -CONF=$1 -FILE=$2 -OUT=$3 -KEYS=$4 - -if [ "_$OUT" == "_" ]; then - echo "Usage: $0 {Config File} {MARC file} {Output File} [{key file}]" - exit; -fi - -DIR=`dirname $0` - -$DIR/marc2bre.pl \ - -k $KEYS \ - -c $CONF $FILE 2>/dev/null | \ - $DIR/direct_ingest.pl \ - -c $CONF \ - -t 1 2>/dev/null | \ - $DIR/pg_loader.pl -c $CONF \ - -or bre \ - -or mrd \ - -or mfr \ - -or mtfe \ - -or mafe \ - -or msfe \ - -or mkfe \ - -or msefe \ - -a mrd \ - -a mfr \ - -a mtfe \ - -a mafe \ - -a msfe \ - -a mkfe \ - -a msefe diff --git a/Open-ILS/src/perlmods/MANIFEST b/Open-ILS/src/perlmods/MANIFEST index aec48d804a..70f80d3a8f 100644 --- a/Open-ILS/src/perlmods/MANIFEST +++ b/Open-ILS/src/perlmods/MANIFEST @@ -41,7 +41,6 @@ lib/OpenILS/Application/Circ/Survey.pm lib/OpenILS/Application/Circ/Transit.pm lib/OpenILS/Application/Collections.pm lib/OpenILS/Application/Fielder.pm -lib/OpenILS/Application/Ingest.pm lib/OpenILS/Application/Penalty.pm lib/OpenILS/Application/PermaCrud.pm lib/OpenILS/Application/Proxy.pm diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat.pm index fbf140a0ef..ab74ed83a8 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat.pm @@ -173,9 +173,6 @@ sub biblio_record_replace_marc { $e->commit unless $U->event_code($res); - #my $ses = OpenSRF::AppSession->create('open-ils.ingest'); - #$ses->request('open-ils.ingest.full.biblio.record', $recid); - return $res; } @@ -426,9 +423,6 @@ sub biblio_record_xml_import { $e->commit; - #my $ses = OpenSRF::AppSession->create('open-ils.ingest'); - #$ses->request('open-ils.ingest.full.biblio.record', $record->id); - return $record; } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat/AuthCommon.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat/AuthCommon.pm index 5a2d79f037..f988146ab8 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat/AuthCommon.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Cat/AuthCommon.pm @@ -38,10 +38,6 @@ sub import_authority_record { $rec = $e->create_authority_record_entry($rec) or return $e->die_event; - # we don't care about the result, just fire off the request - #my $ses = OpenSRF::AppSession->create('open-ils.ingest'); - #$ses->request('open-ils.ingest.full.authority.record', $recid); - return $rec; } @@ -57,10 +53,6 @@ sub overlay_authority_record { $rec = $e->update_authority_record_entry($rec) or return $e->die_event; - # we don't care about the result, just fire off the request - #my $ses = OpenSRF::AppSession->create('open-ils.ingest'); - #$ses->request('open-ils.ingest.full.authority.record', $recid); - return $rec; } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Ingest.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Ingest.pm deleted file mode 100644 index 765000ce31..0000000000 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Ingest.pm +++ /dev/null @@ -1,1453 +0,0 @@ -package OpenILS::Application::Ingest; -use OpenILS::Application; -use base qw/OpenILS::Application/; - -use Unicode::Normalize; -use OpenSRF::EX qw/:try/; - -use OpenSRF::AppSession; -use OpenSRF::Utils::SettingsClient; -use OpenSRF::Utils::Logger qw/:level/; - -use OpenILS::Application::AppUtils; -use OpenILS::Utils::ScriptRunner; -use OpenILS::Utils::Fieldmapper; -use OpenSRF::Utils::JSON; - -use OpenILS::Utils::Fieldmapper; - -use XML::LibXML; -use XML::LibXSLT; -use Time::HiRes qw(time); - -our %supported_formats = ( - mods33 => {ns => 'http://www.loc.gov/mods/v3'}, - mods32 => {ns => 'http://www.loc.gov/mods/v3'}, - mods3 => {ns => 'http://www.loc.gov/mods/v3'}, - mods => {ns => 'http://www.loc.gov/mods/'}, - marcxml => {ns => 'http://www.loc.gov/MARC21/slim'}, - srw_dc => {ns => 'info:srw/schema/1/dc-schema'}, - oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'}, - rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}, - atom => {ns => 'http://www.w3.org/2005/Atom'}, - rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'}, - rss092 => {ns => ''}, - rss093 => {ns => ''}, - rss094 => {ns => ''}, - rss10 => {ns => 'http://purl.org/rss/1.0/'}, - rss11 => {ns => 'http://purl.org/net/rss1.1#'}, - rss2 => {ns => ''}, -); - - -my $log = 'OpenSRF::Utils::Logger'; - -my $parser = XML::LibXML->new(); -my $xslt = XML::LibXSLT->new(); - -my $mods_sheet; -my $mads_sheet; -my $xpathset = {}; -sub initialize {} -sub child_init {} - -sub post_init { - - unless (keys %$xpathset) { - $log->debug("Running post_init", DEBUG); - - my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl'); - - unless ($supported_formats{mods}{xslt}) { - $log->debug("Loading MODS XSLT", DEBUG); - my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl"); - $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); - } - - unless ($supported_formats{mods3}{xslt}) { - $log->debug("Loading MODS v3 XSLT", DEBUG); - my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl"); - $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); - } - - unless ($supported_formats{mods32}{xslt}) { - $log->debug("Loading MODS v32 XSLT", DEBUG); - my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl"); - $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); - } - - unless ($supported_formats{mods33}{xslt}) { - $log->debug("Loading MODS v33 XSLT", DEBUG); - my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS33.xsl"); - $supported_formats{mods33}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); - } - - my $req = OpenSRF::AppSession - ->create('open-ils.cstore') - - # XXX testing new metabib field use for faceting - #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } ) - ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } ) - - ->gather(1); - - if (ref $req and @$req) { - for my $f (@$req) { - $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath; - $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id; - $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format; - $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG); - } - } - } -} - -# -------------------------------------------------------------------------------- -# Biblio ingest - -package OpenILS::Application::Ingest::Biblio; -use base qw/OpenILS::Application::Ingest/; -use Unicode::Normalize; - -sub rw_biblio_ingest_single_object { - my $self = shift; - my $client = shift; - my $bib = shift; - - my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib); - return undef unless ($blob); - - $bib->fingerprint( $blob->{fingerprint}->{fingerprint} ); - $bib->quality( $blob->{fingerprint}->{quality} ); - - my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); - - my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1); - my $tmp; - - # update uri stuff ... - - # gather URI call numbers for this record - my $uri_cns = $u->{call_number} = $cstore->request( - 'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' } - )->gather(1); - - if (@$uri_cns) { - # gather the maps for those call numbers - my $uri_maps = $u->{call_number} = $cstore->request( - 'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns } - )->gather(1); - - # delete the old maps - $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps); - - # and delete the call numbers if there are no more URIs - if (!@{ $blob->{uri} }) { - $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns); - } - } - - # now, add CNs, URIs and maps - my %new_cns_by_owner; - my %new_uris_by_owner; - for my $u ( @{ $blob->{uri} } ) { - - my $owner = $u->{call_number}->owning_lib; - - if ($u->{call_number}->isnew) { - if ($new_cns_by_owner{$owner}) { - $u->{call_number} = $new_cns_by_owner{$owner}; - } else { - $u->{call_number}->clear_id; - $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request( - 'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number} - )->gather(1); - } - } - - if ($u->{uri}->isnew) { - if ($new_uris_by_owner{$owner}) { - $u->{uri} = $new_uris_by_owner{$owner}; - } else { - $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request( - 'open-ils.cstore.direct.asset.uri.create' => $u->{uri} - )->gather(1); - } - } - - # Check for an existing CN-URI map - $tmp = $cstore->request( - 'open-ils.cstore.direct.asset.uri_call_number_map.id_list', - { call_number => $u->{call_number}->id, uri => $u->{uri}->id } - )->gather(1); - - next if ($tmp); - - my $umap = Fieldmapper::asset::uri_call_number_map->new; - $umap->uri($u->{uri}->id); - $umap->call_number($u->{call_number}->id); - - $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1); - } - - # update full_rec stuff ... - $tmp = $cstore->request( - 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic', - { record => $bib->id } - )->gather(1); - - $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp); - $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} }); - - # update rec_descriptor stuff ... - $tmp = $cstore->request( - 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic', - { record => $bib->id } - )->gather(1); - - $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp); - $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1); - - # deal with classed fields... - for my $class ( qw/title author subject keyword series identifier/ ) { - $tmp = $cstore->request( - "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic", - { source => $bib->id } - )->gather(1); - - $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp); - } - for my $obj ( @{ $blob->{field_entries} } ) { - my $class = $obj->class_name; - $class =~ s/^Fieldmapper:://o; - $class =~ s/::/./go; - $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1); - } - - # update MR map ... - - $tmp = $cstore->request( - 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic', - { source => $bib->id } - )->gather(1); - - $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp); - - # get the old MRs - my $old_mrs = $cstore->request( - 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] } - )->gather(1) if (@$tmp); - - $old_mrs = [] if (!ref($old_mrs)); - - my $mr; - for my $m (@$old_mrs) { - if ($m->fingerprint eq $bib->fingerprint) { - $mr = $m; - } else { - my $others = $cstore->request( - 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id } - )->gather(1); - - if (!@$others) { - $cstore->request( - 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id - )->gather(1); - } - - $m->isdeleted(1); - } - } - - my $holds; - if (!$mr) { - # Get the matchin MR, if any. - $mr = $cstore->request( - 'open-ils.cstore.direct.metabib.metarecord.search', - { fingerprint => $bib->fingerprint } - )->gather(1); - - $holds = $cstore->request( - 'open-ils.cstore.direct.action.hold_request.search.atomic', - { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] } - )->gather(1) if (@$old_mrs); - - if ($mr) { - for my $h (@$holds) { - $h->target($mr); - $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1); - $h->ischanged(1); - } - } - } - - if (!$mr) { - $mr = new Fieldmapper::metabib::metarecord; - $mr->fingerprint( $bib->fingerprint ); - $mr->master_record( $bib->id ); - $mr->id( - $cstore->request( - "open-ils.cstore.direct.metabib.metarecord.create", - $mr => { quiet => 'true' } - )->gather(1) - ); - - for my $h (grep { !$_->ischanged } @$holds) { - $h->target($mr); - $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1); - } - } else { - my $mrm = $cstore->request( - 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic', - { metarecord => $mr->id } - )->gather(1); - - if (@$mrm) { - my $best = $cstore->request( - "open-ils.cstore.direct.biblio.record_entry.search", - { id => [ map { $_->source } @$mrm ] }, - { 'select' => { bre => [ qw/id quality/ ] }, - order_by => { bre => "quality desc" }, - limit => 1, - } - )->gather(1); - - if ($best->quality > $bib->quality) { - $mr->master_record($best->id); - } else { - $mr->master_record($bib->id); - } - } else { - $mr->master_record($bib->id); - } - - $mr->clear_mods; - - $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1); - } - - my $mrm = new Fieldmapper::metabib::metarecord_source_map; - $mrm->source($bib->id); - $mrm->metarecord($mr->id); - - $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1); - $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1); - - $cstore->request( 'open-ils.cstore.json_query.atomic' => { from => [ 'reporter.simple_rec_update', $bib->id ] } )->gather(1); - - $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;; - $cstore->disconnect; - - return $bib->id; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.object", - method => "rw_biblio_ingest_single_object", - api_level => 1, - argc => 1, -); - -sub rw_biblio_ingest_single_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - OpenILS::Application::Ingest->post_init(); - my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); - $cstore->request('open-ils.cstore.transaction.begin')->gather(1); - - my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1); - - $cstore->request('open-ils.cstore.transaction.rollback')->gather(1); - $cstore->disconnect; - - return undef unless ($r and @$r); - - return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0]; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.record", - method => "rw_biblio_ingest_single_record", - api_level => 1, - argc => 1, -); - -sub rw_biblio_ingest_record_list { - my $self = shift; - my $client = shift; - my @rec = ref($_[0]) ? @{ $_[0] } : @_ ; - - OpenILS::Application::Ingest->post_init(); - my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); - $cstore->request('open-ils.cstore.transaction.begin')->gather(1); - - my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => \@rec } )->gather(1); - - $cstore->request('open-ils.cstore.transaction.rollback')->gather(1); - $cstore->disconnect; - - return undef unless ($r and @$r); - - my $count = 0; - for (@$r) { - if (($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0]) { - $count++ - } - } - return $count; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.record_list", - method => "rw_biblio_ingest_record_list", - api_level => 1, - argc => 1, -); - -sub ro_biblio_ingest_single_object { - my $self = shift; - my $client = shift; - my $bib = shift; - my $xml = OpenILS::Application::AppUtils->entityize($bib->marc); - my $max_cn = shift; - my $max_uri = shift; - - my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); - - if (!$max_cn) { - my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1); - $max_cn = int($cn->id) + 1000; - } - - if (!$max_uri) { - my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1); - $max_uri = int($cn->id) + 1000; - } - - $cstore->disconnect; - - my $document = $parser->parse_string($xml); - - my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib, $max_cn, $max_uri); - my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document); - my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document); - my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml); - my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml); - - $_->source($bib->id) for (@mXfe); - $_->record($bib->id) for (@mfr); - $rd->record($bib->id) if ($rd); - - return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris }; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.object.readonly", - method => "ro_biblio_ingest_single_object", - api_level => 1, - argc => 1, -); - -sub ro_biblio_ingest_single_xml { - my $self = shift; - my $client = shift; - my $xml = OpenILS::Application::AppUtils->entityize(shift); - - my $document = $parser->parse_string($xml); - - my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document); - my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document); - my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml); - my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml); - - return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd }; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.xml.readonly", - method => "ro_biblio_ingest_single_xml", - api_level => 1, - argc => 1, -); - -sub ro_biblio_ingest_single_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - OpenILS::Application::Ingest->post_init(); - my $r = OpenSRF::AppSession - ->create('open-ils.cstore') - ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec ) - ->gather(1); - - return undef unless ($r and @$r); - - my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc); - - $_->source($rec) for (@{$res->{field_entries}}); - $_->record($rec) for (@{$res->{full_rec}}); - $res->{descriptor}->record($rec); - - return $res; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.record.readonly", - method => "ro_biblio_ingest_single_record", - api_level => 1, - argc => 1, -); - -sub ro_biblio_ingest_stream_record { - my $self = shift; - my $client = shift; - - OpenILS::Application::Ingest->post_init(); - - my $ses = OpenSRF::AppSession->create('open-ils.cstore'); - - while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { - - my $rec = $resp->content; - last unless (defined $rec); - - $log->debug("Running open-ils.ingest.full.biblio.record.readonly ..."); - my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec); - - $_->source($rec) for (@{$res->{field_entries}}); - $_->record($rec) for (@{$res->{full_rec}}); - - $client->respond( $res ); - } - - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.record_stream.readonly", - method => "ro_biblio_ingest_stream_record", - api_level => 1, - stream => 1, -); - -sub ro_biblio_ingest_stream_xml { - my $self = shift; - my $client = shift; - - OpenILS::Application::Ingest->post_init(); - - my $ses = OpenSRF::AppSession->create('open-ils.cstore'); - - while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { - - my $xml = $resp->content; - last unless (defined $xml); - - $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ..."); - my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml); - - $client->respond( $res ); - } - - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.xml_stream.readonly", - method => "ro_biblio_ingest_stream_xml", - api_level => 1, - stream => 1, -); - -sub rw_biblio_ingest_stream_import { - my $self = shift; - my $client = shift; - - OpenILS::Application::Ingest->post_init(); - - my $ses = OpenSRF::AppSession->create('open-ils.cstore'); - - while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { - - my $bib = $resp->content; - last unless (defined $bib); - - $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ..."); - my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc); - - $_->source($bib->id) for (@{$res->{field_entries}}); - $_->record($bib->id) for (@{$res->{full_rec}}); - - $client->respond( $res ); - } - - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.biblio.bib_stream.import", - method => "rw_biblio_ingest_stream_import", - api_level => 1, - stream => 1, -); - - -# -------------------------------------------------------------------------------- -# Authority ingest - -package OpenILS::Application::Ingest::Authority; -use base qw/OpenILS::Application::Ingest/; -use Unicode::Normalize; - -sub rw_authority_ingest_single_object { - my $self = shift; - my $client = shift; - my $auth = shift; - - my ($blob) = $self->method_lookup("open-ils.ingest.full.authority.object.readonly")->run($auth); - return undef unless ($blob); - - my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); - - my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1); - my $tmp; - - # update full_rec stuff ... - $tmp = $cstore->request( - 'open-ils.cstore.direct.authority.full_rec.id_list.atomic', - { record => $auth->id } - )->gather(1); - - $cstore->request( 'open-ils.cstore.direct.authority.full_rec.delete' => $_ )->gather(1) for (@$tmp); - $cstore->request( 'open-ils.cstore.direct.authority.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} }); - - # XXX when we start extracting authority descriptors and adding sources ... - # - # update rec_descriptor stuff ... - #$tmp = $cstore->request( - # 'open-ils.cstore.direct.authority.record_descriptor.id_list.atomic', - # { record => $auth->id } - #)->gather(1); - # - #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.delete' => $_ )->gather(1) for (@$tmp); - #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.create' => $blob->{descriptor} )->gather(1); - #$cstore->request( 'open-ils.cstore.direct.authority.record_entry.update' => $auth )->gather(1); - - $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;; - $cstore->disconnect; - - return $auth->id; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.object", - method => "rw_authority_ingest_single_object", - api_level => 1, - argc => 1, -); - -sub rw_authority_ingest_single_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - OpenILS::Application::Ingest->post_init(); - my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); - $cstore->request('open-ils.cstore.transaction.begin')->gather(1); - - my $r = $cstore->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )->gather(1); - - $cstore->request('open-ils.cstore.transaction.rollback')->gather(1); - $cstore->disconnect; - - return undef unless ($r and @$r); - - return ($self->method_lookup("open-ils.ingest.full.authority.object")->run($r))[0]; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.record", - method => "rw_authority_ingest_single_record", - api_level => 1, - argc => 1, -); - -sub ro_authority_ingest_single_object { - my $self = shift; - my $client = shift; - my $bib = shift; - my $xml = OpenILS::Application::AppUtils->entityize($bib->marc); - - my $document = $parser->parse_string($xml); - - my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document); - - $_->record($bib->id) for (@mfr); - - return { full_rec => \@mfr }; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.object.readonly", - method => "ro_authority_ingest_single_object", - api_level => 1, - argc => 1, -); - -sub ro_authority_ingest_single_xml { - my $self = shift; - my $client = shift; - my $xml = OpenILS::Application::AppUtils->entityize(shift); - - my $document = $parser->parse_string($xml); - - my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document); - - return { full_rec => \@mfr }; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.xml.readonly", - method => "ro_authority_ingest_single_xml", - api_level => 1, - argc => 1, -); - -sub ro_authority_ingest_single_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - OpenILS::Application::Ingest->post_init(); - my $r = OpenSRF::AppSession - ->create('open-ils.cstore') - ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec ) - ->gather(1); - - return undef unless ($r and @$r); - - my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc); - - $_->record($rec) for (@{$res->{full_rec}}); - $res->{descriptor}->record($rec); - - return $res; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.record.readonly", - method => "ro_authority_ingest_single_record", - api_level => 1, - argc => 1, -); - -sub ro_authority_ingest_stream_record { - my $self = shift; - my $client = shift; - - OpenILS::Application::Ingest->post_init(); - - my $ses = OpenSRF::AppSession->create('open-ils.cstore'); - - while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { - - my $rec = $resp->content; - last unless (defined $rec); - - $log->debug("Running open-ils.ingest.full.authority.record.readonly ..."); - my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec); - - $_->record($rec) for (@{$res->{full_rec}}); - - $client->respond( $res ); - } - - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.record_stream.readonly", - method => "ro_authority_ingest_stream_record", - api_level => 1, - stream => 1, -); - -sub ro_authority_ingest_stream_xml { - my $self = shift; - my $client = shift; - - OpenILS::Application::Ingest->post_init(); - - my $ses = OpenSRF::AppSession->create('open-ils.cstore'); - - while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { - - my $xml = $resp->content; - last unless (defined $xml); - - $log->debug("Running open-ils.ingest.full.authority.xml.readonly ..."); - my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml); - - $client->respond( $res ); - } - - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.xml_stream.readonly", - method => "ro_authority_ingest_stream_xml", - api_level => 1, - stream => 1, -); - -sub rw_authority_ingest_stream_import { - my $self = shift; - my $client = shift; - - OpenILS::Application::Ingest->post_init(); - - my $ses = OpenSRF::AppSession->create('open-ils.cstore'); - - while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { - - my $bib = $resp->content; - last unless (defined $bib); - - $log->debug("Running open-ils.ingest.full.authority.xml.readonly ..."); - my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc); - - $_->record($bib->id) for (@{$res->{full_rec}}); - - $client->respond( $res ); - } - - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.full.authority.bib_stream.import", - method => "rw_authority_ingest_stream_import", - api_level => 1, - stream => 1, -); - -# -------------------------------------------------------------------------------- -# MARC index extraction - -package OpenILS::Application::Ingest::XPATH; -use base qw/OpenILS::Application::Ingest/; -use Unicode::Normalize; - -# give this an XML documentElement and an XPATH expression -sub xpath_to_string { - my $xml = shift; - my $xpath = shift; - my $ns_uri = shift; - my $ns_prefix = shift; - my $unique = shift; - - $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix); - - my $string = ""; - - # grab the set of matching nodes - my @nodes = $xml->findnodes( $xpath ); - for my $value (@nodes) { - - # grab all children of the node - my @children = $value->childNodes(); - for my $child (@children) { - - # add the childs content to the growing buffer - my $content = quotemeta($child->textContent); - next if ($unique && $string =~ /$content/); # uniquify the values - $string .= $child->textContent . " "; - } - if( ! @children ) { - $string .= $value->textContent . " "; - } - } - - $string =~ s/(\w+)\/(\w+)/$1 $2/sgo; - # Split date ranges and ISSNs on the hyphen - $string =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi; - - return NFD($string); -} - -sub class_index_string_xml { - my $self = shift; - my $client = shift; - my $xml = shift; - my @classes = @_; - - OpenILS::Application::Ingest->post_init(); - $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml); - - my %transform_cache; - - for my $class (@classes) { - my $class_constructor = "Fieldmapper::metabib::${class}_field_entry"; - for my $type ( keys %{ $xpathset->{$class} } ) { - - my $def = $xpathset->{$class}->{$type}; - my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}}; - - my $document = $xml; - - if ($sf->{xslt}) { - $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml); - $transform_cache{$def->{format}} = $document; - } - - my $value = xpath_to_string( - $document->documentElement => $def->{xpath}, - $sf->{ns} => $def->{format}, - 1 - ); - - next unless $value; - - $value = NFD($value); - $value =~ s/\pM+//sgo; - $value =~ s/\pC+//sgo; - $value =~ s/\W+$//sgo; - - # hack to normalize ratio-like strings - while ($term =~ /\b\d{1}:[, ]?\d+(?:[ ,]\d+[^:])+/o) { - $term = $` . join ('', split(/[, ]/, $&)) . $'; - } - - $value =~ s/\b\.+\b//sgo; - $value = lc($value); - - my $fm = $class_constructor->new; - $fm->value( $value ); - $fm->field( $xpathset->{$class}->{$type}->{id} ); - $client->respond($fm); - } - } - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.field_entry.class.xml", - method => "class_index_string_xml", - api_level => 1, - argc => 2, - stream => 1, -); - -sub class_index_string_record { - my $self = shift; - my $client = shift; - my $rec = shift; - my @classes = shift; - - OpenILS::Application::Ingest->post_init(); - my $r = OpenSRF::AppSession - ->create('open-ils.cstore') - ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec ) - ->gather(1); - - return undef unless ($r and @$r); - - for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) { - $fm->source($rec); - $client->respond($fm); - } - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.field_entry.class.record", - method => "class_index_string_record", - api_level => 1, - argc => 2, - stream => 1, -); - -sub all_index_string_xml { - my $self = shift; - my $client = shift; - my $xml = shift; - - for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) { - $client->respond($fm); - } - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.extract.field_entry.all.xml", - method => "all_index_string_xml", - api_level => 1, - argc => 1, - stream => 1, -); - -sub all_index_string_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - OpenILS::Application::Ingest->post_init(); - my $r = OpenSRF::AppSession - ->create('open-ils.cstore') - ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec ) - ->gather(1); - - return undef unless ($r and @$r); - - for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) { - $fm->source($rec); - $client->respond($fm); - } - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.extract.field_entry.all.record", - method => "all_index_string_record", - api_level => 1, - argc => 1, - stream => 1, -); - -# -------------------------------------------------------------------------------- -# Flat MARC - -package OpenILS::Application::Ingest::FlatMARC; -use base qw/OpenILS::Application::Ingest/; -use Unicode::Normalize; - - -sub _marcxml_to_full_rows { - - my $marcxml = shift; - my $xmltype = shift || 'metabib'; - - my $type = "Fieldmapper::${xmltype}::full_rec"; - - my @ns_list; - - my ($root) = $marcxml->findnodes('//*[local-name()="record"]'); - - for my $tagline ( @{$root->getChildrenByTagName("leader")} ) { - next unless $tagline; - _special_tag_to_full_rows($type, $tagline, \@ns_list, 'LDR'); - } - - for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) { - next unless $tagline; - _special_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" )); - } - - for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) { - next unless $tagline; - _data_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" )); - - if ($xmltype eq 'metabib' and $tag eq '245') { - _data_tag_to_full_rows($type, $tagline, \@ns_list, 'tnf'); - } - } - - $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml"); - return @ns_list; -} - -=head2 _special_tag_to_full_rows - -Converts a leader or control field to a set of normalized values - -=cut - -sub _special_tag_to_full_rows { - my $type = shift; - my $tagline = shift; - my $ns_list = shift; - my $tagname = shift; - - my $ns = $type->new; - - $ns->tag( $tagname ); - my $val = $tagline->textContent; - $val = NFD($val); - $val =~ s/\pM+//sgo; - $val =~ s/\pC+//sgo; - $val =~ s/\W+$//sgo; - $ns->value( $val ); - - push @$ns_list, $ns; -} - -=head2 _data_tag_to_full_rows - -Converts a data field to a set of normalized values - -=cut - -sub _data_tag_to_full_rows { - my $type = shift; - my $tagline = shift; - my $ns_list = shift; - my $tag = shift; - - my $ind1 = $tagline->getAttribute( "ind1" ); - my $ind2 = $tagline->getAttribute( "ind2" ); - - for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) { - next unless $data; - - my $ns = $type->new; - - $ns->tag( $tag ); - $ns->ind1( $ind1 ); - $ns->ind2( $ind2 ); - $ns->subfield( $data->getAttribute( "code" ) ); - my $val = $data->textContent; - $val = NFD($val); - $val =~ s/\pM+//sgo; - $val =~ s/\pC+//sgo; - $val =~ s/\W+$//sgo; - # Split date ranges and ISSNs on the hyphen - $val =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi; - $val =~ s/(\w+)\/(\w+)/$1 $2/sgo; - $ns->value( lc($val) ); - - push @$ns_list, $ns; - } -} - -sub flat_marc_xml { - my $self = shift; - my $client = shift; - my $xml = shift; - - $log->debug("processing [$xml]"); - - $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml); - - my $type = 'metabib'; - $type = 'authority' if ($self->api_name =~ /authority/o); - - OpenILS::Application::Ingest->post_init(); - - $client->respond($_) for (_marcxml_to_full_rows($xml, $type)); - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.flat_marc.authority.xml", - method => "flat_marc_xml", - api_level => 1, - argc => 1, - stream => 1, -); -__PACKAGE__->register_method( - api_name => "open-ils.ingest.flat_marc.biblio.xml", - method => "flat_marc_xml", - api_level => 1, - argc => 1, - stream => 1, -); - -sub flat_marc_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - my $type = 'biblio'; - $type = 'authority' if ($self->api_name =~ /authority/o); - - OpenILS::Application::Ingest->post_init(); - my $r = OpenSRF::AppSession - ->create('open-ils.cstore') - ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec ) - ->gather(1); - - - return undef unless ($r and $r->marc); - - my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc); - for my $row (@rows) { - $client->respond($row); - $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG); - } - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.flat_marc.biblio.record_entry", - method => "flat_marc_record", - api_level => 1, - argc => 1, - stream => 1, -); -__PACKAGE__->register_method( - api_name => "open-ils.ingest.flat_marc.authority.record_entry", - method => "flat_marc_record", - api_level => 1, - argc => 1, - stream => 1, -); - -# -------------------------------------------------------------------------------- -# URI extraction - -package OpenILS::Application::Ingest::Biblio::URI; -use base qw/OpenILS::Application::Ingest/; -use Unicode::Normalize; -use OpenSRF::EX qw/:try/; - - -sub _extract_856_uris { - - my $rec = shift; - my $max_cn = shift; - my $max_uri = shift; - my @objects; - - my $recid = $rec->id; - my $marcxml = $rec->marc; - - my $document = $parser->parse_string($marcxml); - my @nodes = $document->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]'); - - my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); - - my %cn_cache; - - for my $node (@nodes) { - # first, is there a URI? - my $href = $node->findvalue('*[local-name()="subfield" and @code="u"]/text()'); - next unless ($href); - - # now, find the best possible label - my $label = $node->findvalue('*[local-name()="subfield" and @code="y"]/text()'); - $label ||= $node->findvalue('*[local-name()="subfield" and @code="3"]/text()'); - $label ||= $href; - - # look for use info - my $use = $node->findvalue('*[local-name()="subfield" and @code="z"]/text()'); - $use ||= $node->findvalue('*[local-name()="subfield" and @code="2"]/text()'); - $use ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()'); - - # moving on to the URI owner - my $owner = $node->findvalue('*[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce - $owner ||= $node->findvalue('*[local-name()="subfield" and @code="w"]/text()'); - $owner ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()'); - - $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ... - - # no owner? skip it :( - next unless ($owner); - - my $org = $cstore - ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} ) - ->gather(1); - - next unless ($org); - - # now we can construct the uri object - my $uri = $cstore - ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use_restriction => $use, active => 't' } ) - ->gather(1); - - if (!$uri) { - $uri = Fieldmapper::asset::uri->new; - $uri->isnew( 1 ); - $uri->id( $$max_uri++ ); - $uri->label($label); - $uri->href($href); - $uri->active('t'); - $uri->use_restriction($use); - } - - # see if we need to create a call number - my $cn = $cn_cache{$org->id}; - $cn = $cn->clone if ($cn); - $cn->clear_isnew if ($cn); - - $cn ||= $cstore - ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owning_lib => $org->id, record => $recid, label => '##URI##' } ) - ->gather(1); - - if (!$cn) { - $cn = Fieldmapper::asset::call_number->new; - $cn->isnew( 1 ); - $cn->deleted('f'); - $cn->id( $$max_cn++ ); - $cn->owning_lib( $org->id ); - $cn->record( $recid ); - $cn->create_date( 'now' ); - $cn->creator( $rec->creator ); - $cn->editor( $rec->editor ); - $cn->edit_date( 'now' ); - $cn->label( '##URI##' ); - } - - $cn_cache{$org->id} = $cn; - - push @objects, { uri => $uri, call_number => $cn }; - } - - $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid"); - $cstore->disconnect; - return @objects; -} - -sub get_uris_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - OpenILS::Application::Ingest->post_init(); - my $r = OpenSRF::AppSession - ->create('open-ils.cstore') - ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec ) - ->gather(1); - - return undef unless ($r and $r->marc); - - $client->respond($_) for (_extract_856_uris($r)); - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.856_uri.record", - method => "get_uris_record", - api_level => 1, - argc => 1, - stream => 1, -); - -sub get_uris_object { - my $self = shift; - my $client = shift; - my $obj = shift; - my $max_cn = shift; - my $max_uri = shift; - - return undef unless ($obj and $obj->marc); - - $client->respond($_) for (_extract_856_uris($obj, \$max_cn, \$max_uri)); - return undef; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.856_uri.object", - method => "get_uris_object", - api_level => 1, - argc => 1, - stream => 1, -); - - -# -------------------------------------------------------------------------------- -# Fingerprinting - -package OpenILS::Application::Ingest::Biblio::Fingerprint; -use base qw/OpenILS::Application::Ingest/; -use Unicode::Normalize; -use OpenSRF::EX qw/:try/; - -sub biblio_fingerprint_record { - my $self = shift; - my $client = shift; - my $rec = shift; - - OpenILS::Application::Ingest->post_init(); - - my $r = OpenSRF::AppSession - ->create('open-ils.cstore') - ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec ) - ->gather(1); - - return undef unless ($r and $r->marc); - - my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc); - $log->debug("Returning [$fp] as fingerprint for record $rec", INFO); - $fp->{quality} = int($fp->{quality}); - return $fp; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.fingerprint.record", - method => "biblio_fingerprint_record", - api_level => 1, - argc => 1, -); - -our $fp_script; -sub biblio_fingerprint { - my $self = shift; - my $client = shift; - my $xml = OpenILS::Application::AppUtils->entityize(shift); - - $log->internal("Got MARC [$xml]"); - - if(!$fp_script) { - my @pfx = ( "apps", "open-ils.ingest","app_settings" ); - my $conf = OpenSRF::Utils::SettingsClient->new; - - my $libs = $conf->config_value(@pfx, 'script_path'); - my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint'); - my $script_libs = (ref($libs)) ? $libs : [$libs]; - - $log->debug("Loading script $script_file for biblio fingerprinting..."); - - $fp_script = new OpenILS::Utils::ScriptRunner - ( file => $script_file, - paths => $script_libs, - reset_count => 100 ); - } - - $fp_script->insert('environment' => {marc => $xml} => 1); - - my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef); - $log->debug("Script for biblio fingerprinting completed successfully..."); - - return $res; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.fingerprint.xml", - method => "biblio_fingerprint", - api_level => 1, - argc => 1, -); - -our $rd_script; -sub biblio_descriptor { - my $self = shift; - my $client = shift; - my $xml = OpenILS::Application::AppUtils->entityize(shift); - - $log->internal("Got MARC [$xml]"); - - if(!$rd_script) { - my @pfx = ( "apps", "open-ils.ingest","app_settings" ); - my $conf = OpenSRF::Utils::SettingsClient->new; - - my $libs = $conf->config_value(@pfx, 'script_path'); - my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor'); - my $script_libs = (ref($libs)) ? $libs : [$libs]; - - $log->debug("Loading script $script_file for biblio descriptor extraction..."); - - $rd_script = new OpenILS::Utils::ScriptRunner - ( file => $script_file, - paths => $script_libs, - reset_count => 100 ); - } - - $log->debug("Setting up environment for descriptor extraction script..."); - $rd_script->insert('environment.marc' => $xml => 1); - $log->debug("Environment building complete..."); - - my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef); - $log->debug("Script for biblio descriptor extraction completed successfully"); - - my $d1 = $res->date1; - if ($d1 && $d1 ne ' ') { - $d1 =~ tr/ux/00/; - $res->date1( $d1 ); - } - - my $d2 = $res->date2; - if ($d2 && $d2 ne ' ') { - $d2 =~ tr/ux/99/; - $res->date2( $d2 ); - } - - return $res; -} -__PACKAGE__->register_method( - api_name => "open-ils.ingest.descriptor.xml", - method => "biblio_descriptor", - api_level => 1, - argc => 1, -); - - -1; - -# vim:et:ts=4:sw=4: diff --git a/Open-ILS/src/perlmods/t/01-OpenILS-Application.t b/Open-ILS/src/perlmods/t/01-OpenILS-Application.t index 6b30fbbe80..5474f61acb 100644 --- a/Open-ILS/src/perlmods/t/01-OpenILS-Application.t +++ b/Open-ILS/src/perlmods/t/01-OpenILS-Application.t @@ -1,7 +1,7 @@ #!perl -T use utf8; -use Test::More tests => 14; +use Test::More tests => 13; BEGIN { use_ok( 'OpenILS::Application' ); @@ -11,7 +11,6 @@ use_ok( 'OpenILS::Application::AppUtils' ); use_ok( 'OpenILS::Application::Booking' ); use_ok( 'OpenILS::Application::Collections' ); use_ok( 'OpenILS::Application::Fielder' ); -use_ok( 'OpenILS::Application::Ingest' ); use_ok( 'OpenILS::Application::Penalty' ); use_ok( 'OpenILS::Application::PermaCrud' ); use_ok( 'OpenILS::Application::Reporter' ); diff --git a/docs/RELEASE_NOTES_NEXT/removal_of_ingest.txt b/docs/RELEASE_NOTES_NEXT/removal_of_ingest.txt new file mode 100644 index 0000000000..cc6aee2f8c --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/removal_of_ingest.txt @@ -0,0 +1,21 @@ +Removal of open-ils.ingest service +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The open-ils.ingest service is no longer required, and has been +removed. + +You should update your opensrf.xml file to remove references to +open-ils.ingest, and you may also wish to remove the +OpenILS/Application/Ingest.pm file from your Perl @INC path. + +In opensrf.xml, remove the entire element from the + element, and remove open-ils.actor from +any elements where it is present. + +If you have the perldoc command installed, you can use the following +command to locate the path on disk of the Ingest.pm file, which is +no longer required and can be removed: + +[source, bash] +----------------------------------------------------------------- +perldoc -l OpenILS::Application::Ingest +----------------------------------------------------------------- -- 2.43.2