From f394751a7cd63b776db2c501e045f2717c47ad31 Mon Sep 17 00:00:00 2001 From: gmc Date: Wed, 6 Apr 2011 22:06:40 +0000 Subject: [PATCH] Revert "install command-line MARC import tools in @prefix@/bin" This reverts commit 7125758576667feeff054ceb4b79d7d1510bbc6f. git-svn-id: svn://svn.open-ils.org/ILS/trunk@20009 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/Makefile.am | 2 +- Open-ILS/src/extras/import/marc2are.pl | 2 + Open-ILS/src/extras/import/marc2are.pl.in | 119 ------ Open-ILS/src/extras/import/marc2bre.pl | 2 + Open-ILS/src/extras/import/marc2bre.pl.in | 396 ------------------ Open-ILS/src/extras/import/marc2sre.pl.in | 323 -------------- .../src/extras/import/parallel_pg_loader.pl | 2 + .../extras/import/parallel_pg_loader.pl.in | 136 ------ configure.ac | 9 +- 9 files changed, 8 insertions(+), 983 deletions(-) delete mode 100755 Open-ILS/src/extras/import/marc2are.pl.in delete mode 100755 Open-ILS/src/extras/import/marc2bre.pl.in delete mode 100755 Open-ILS/src/extras/import/marc2sre.pl.in delete mode 100755 Open-ILS/src/extras/import/parallel_pg_loader.pl.in diff --git a/Open-ILS/src/Makefile.am b/Open-ILS/src/Makefile.am index e87bbd142b..7988b53eb0 100644 --- a/Open-ILS/src/Makefile.am +++ b/Open-ILS/src/Makefile.am @@ -138,7 +138,7 @@ if BUILDEGJAVA OILSJAVA_DIR = java endif -bin_SCRIPTS = $(core_scripts) $(reporter_scripts) $(installautojs) @srcdir@/extras/eg_config @srcdir@/extras/fast-extract @srcdir@/extras/import/marc2are.pl @srcdir@/extras/import/marc2bre.pl @srcdir@/extras/import/marc2sre.pl @srcdir@/extras/import/parallel_pg_loader.pl +bin_SCRIPTS = $(core_scripts) $(reporter_scripts) $(installautojs) @srcdir@/extras/eg_config @srcdir@/extras/fast-extract data_DATA = $(core_data) $(reporter_data) # Take care of which subdirectories to build, and which extra files to include in a distribution. diff --git a/Open-ILS/src/extras/import/marc2are.pl b/Open-ILS/src/extras/import/marc2are.pl index 62734adaa1..1eb86d43cf 100755 --- a/Open-ILS/src/extras/import/marc2are.pl +++ b/Open-ILS/src/extras/import/marc2are.pl @@ -2,6 +2,8 @@ use strict; use warnings; +use lib '/openils/lib/perl5/'; + use OpenSRF::System; use OpenSRF::Application; use OpenSRF::EX qw/:try/; diff --git a/Open-ILS/src/extras/import/marc2are.pl.in b/Open-ILS/src/extras/import/marc2are.pl.in deleted file mode 100755 index d6a4c12c00..0000000000 --- a/Open-ILS/src/extras/import/marc2are.pl.in +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -use OpenSRF::System; -use OpenSRF::Application; -use OpenSRF::EX qw/:try/; -use OpenSRF::AppSession; -use OpenSRF::MultiSession; -use OpenSRF::Utils::SettingsClient; -use OpenILS::Application::AppUtils; -use OpenILS::Utils::Fieldmapper; -use Digest::MD5 qw/md5_hex/; -use OpenSRF::Utils::JSON; -use Data::Dumper; -use Unicode::Normalize; - -use Time::HiRes qw/time/; -use Getopt::Long; -use MARC::Batch; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Charset; - -MARC::Charset->ignore_errors(1); - -my ($count, $user, $password, $config, $marctype, $keyfile, @files, $quiet) = - (1, 'admin', 'open-ils', '@sysconfdir@/opensrf_core.xml', 'USMARC'); - -GetOptions( - 'startid=i' => \$count, - 'user=s' => \$user, - 'marctype=s' => \$marctype, - 'password=s' => \$password, - 'config=s' => \$config, - 'file=s' => \@files, - 'quiet' => \$quiet, -); - -@files = @ARGV if (!@files); - -my @ses; -my @req; -my %processing_cache; - -OpenSRF::System->bootstrap_client( config_file => $config ); -Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); - -$user = OpenILS::Application::AppUtils->check_user_session( login($user,$password) )->id; - -select STDERR; $| = 1; -select STDOUT; $| = 1; - -my $batch = new MARC::Batch ( $marctype, @files ); -$batch->strict_off(); -$batch->warnings_off(); - -my $starttime = time; -my $rec; -while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { - next if ($rec == -1); - my $id = $count; - - (my $xml = $rec->as_xml_record()) =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - $xml =~ s/[\x00-\x1f]//go; - - my $bib = new Fieldmapper::authority::record_entry; - $bib->id($id); - $bib->active('t'); - $bib->deleted('f'); - $bib->marc($xml); - $bib->creator($user); - $bib->create_date('now'); - $bib->editor($user); - $bib->edit_date('now'); - $bib->last_xact_id('IMPORT-'.$starttime); - - print OpenSRF::Utils::JSON->perl2JSON($bib)."\n"; - - $count++; - - if (!$quiet && !($count % 20)) { - print STDERR "\r$count\t". $count / (time - $starttime); - } -} - -sub login { - my( $username, $password, $type ) = @_; - - $type |= "staff"; - - my $seed = OpenILS::Application::AppUtils->simplereq( - 'open-ils.auth', - 'open-ils.auth.authenticate.init', - $username - ); - - die("No auth seed. Couldn't talk to the auth server") unless $seed; - - my $response = OpenILS::Application::AppUtils->simplereq( - 'open-ils.auth', - 'open-ils.auth.authenticate.complete', - { username => $username, - password => md5_hex($seed . md5_hex($password)), - type => $type }); - - die("No auth response returned on login.") unless $response; - - my $authtime = $response->{payload}->{authtime}; - my $authtoken = $response->{payload}->{authtoken}; - - die("Login failed for user $username!") unless $authtoken; - - return $authtoken; -} - diff --git a/Open-ILS/src/extras/import/marc2bre.pl b/Open-ILS/src/extras/import/marc2bre.pl index bddde4fb83..e5c9604dcb 100755 --- a/Open-ILS/src/extras/import/marc2bre.pl +++ b/Open-ILS/src/extras/import/marc2bre.pl @@ -2,6 +2,8 @@ use strict; use warnings; +#use lib '/openils/lib/perl5/'; + use Error qw/:try/; use OpenILS::Utils::Fieldmapper; use Digest::MD5 qw/md5_hex/; diff --git a/Open-ILS/src/extras/import/marc2bre.pl.in b/Open-ILS/src/extras/import/marc2bre.pl.in deleted file mode 100755 index d9de5c3f44..0000000000 --- a/Open-ILS/src/extras/import/marc2bre.pl.in +++ /dev/null @@ -1,396 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -use Error qw/:try/; -use OpenILS::Utils::Fieldmapper; -use Digest::MD5 qw/md5_hex/; -use OpenSRF::Utils::JSON; -use OpenILS::Application::AppUtils; -use Data::Dumper; -use Unicode::Normalize; -use Encode; - -use FileHandle; -use Time::HiRes qw/time/; -use Getopt::Long; -use MARC::Batch; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Charset; -use DBI; - -#MARC::Charset->ignore_errors(1); - -my ($id_field, $id_subfield, $recid, $user, $config, $idlfile, $marctype, $tcn_offset, $tcn_mapfile, $tcn_dumpfile, $used_id_file, $used_tcn_file, $enc, @files, @trash_fields, @req_fields, $use901, $quiet, $tcn_field, $tcn_subfield) = - ('', 'a', 0, 1, '@sysconfdir@/opensrf_core.xml', '@sysconfdir@/fm_IDL.xml', 'USMARC', 0); - -my ($db_driver, $db_host, $db_port, $db_name, $db_user, $db_pw) = - ('Pg', 'localhost', 5432, 'evergreen', 'postgres', 'postgres'); - -GetOptions( - 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML - 'startid=i' => \$recid, # id number to start with when auto-assigning id numbers, defaults to highest id in database + 1 - 'idfield=s' => \$id_field, # field containing the record's desired internal id, NOT tcn - 'idsubfield=s' => \$id_subfield, # subfield of above record id field - 'tcnfield=s' => \$tcn_field, # field containing the record's desired tcn, NOT the internal id - 'tcnsubfield=s' => \$tcn_subfield, # subfield of above record tcn field - 'tcnoffset=i' => \$tcn_offset, # optionally skip characters at beginning of supplied tcn (e.g. to remove '(Sirsi)') - 'user=s' => \$user, # set creator/editor values for records in database - 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset - 'keyfile=s' => \$tcn_mapfile, # DEPRECATED, use tcn_mapfile instead - 'tcn_mapfile=s' => \$tcn_mapfile, # external file which allows for matching specific record tcns to specific record ids, format = one id_number|tcn_number combo per line - 'tcnfile=s' => \$tcn_dumpfile, # DEPRECATED, use tcn_dumpfile instead - 'tcn_dumpfile=s' => \$tcn_dumpfile, # allows specification of a dumpfile for all used tcn values - 'config=s' => \$config, # location of OpenSRF core config file, defaults to @sysconfdir@/opensrf_core.xml - 'file=s' => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV) - 'required_fields=s' => \@req_fields, # skip any records missing these fields - 'trash=s' => \@trash_fields, # fields to remove from all processed records - 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to @sysconfdir@/fm_IDL.xml - 'dontuse=s' => \$used_id_file, # DEPRECATED, use used_id_file instead - 'used_id_file=s' => \$used_id_file, # external file which prevents id collisions by specifying ids already in use in the database, format = one id number per line - 'used_tcn_file=s' => \$used_tcn_file, # external file which prevents tcn collisions by specifying tcns already in use in the database, format = one tcn number per line - "db_driver=s" => \$db_driver, # database driver type, usually 'Pg' - "db_host=s" => \$db_host, # database hostname - "db_port=i" => \$db_port, # database port - "db_name=s" => \$db_name, # database name - "db_user=s" => \$db_user, # database username - "db_pw=s" => \$db_pw, # database password - 'use901' => \$use901, # use values from previously created 901 fields and skip all other processing - 'quiet' => \$quiet # do not output progress count -); - -@trash_fields = split(/,/,join(',',@trash_fields)); -@req_fields = split(/,/,join(',',@req_fields)); - -if ($enc) { - MARC::Charset->ignore_errors(1); - MARC::Charset->assume_encoding($enc); -} - -if (uc($marctype) eq 'XML') { - 'open'->use(':utf8'); -} else { - bytes->use(); -} - -@files = @ARGV if (!@files); - -my @ses; -my @req; -my %processing_cache; - -my $dsn = "dbi:$db_driver:host=$db_host;port=$db_port;dbname=$db_name"; - -if (!$recid) { - my $table = 'biblio_record_entry'; - $table = 'biblio.record_entry' if ($db_driver eq 'Pg'); - - my $dbh = DBI->connect($dsn,$db_user,$db_pw); - my $sth = $dbh->prepare("SELECT MAX(id) + 1 FROM $table"); - - $sth->execute; - $sth->bind_col(1, \$recid); - $sth->fetch; - $sth->finish; - $dbh->disconnect; - - # In a clean Evergreen schema, the maximum ID will be -1; but sequences - # have to start at 1, so handle the clean Evergreen schema situation - if ($recid == 0) { - $recid = 1; - } -} - -my %tcn_source_map = ( - a => 'Sirsi_Auto', - o => 'OCLC', - i => 'ISxN', - l => 'LCCN', - s => 'System', - g => 'Gutenberg', - z => 'Unknown', -); - -Fieldmapper->import(IDL => $idlfile); - -my %tcn_map; -if ($tcn_mapfile) { - open F, $tcn_mapfile or die "Couldn't open key file $tcn_mapfile"; - while () { - if ( /^(\d+)\|(\S+)/o ) { - $tcn_map{$1} = $2; - } - } - close(F); -} - -my %used_recids; -if ($used_id_file) { - open F, $used_id_file or die "Couldn't open used-id file $used_id_file"; - while () { - chomp; - s/^\s*//; - s/\s*$//; - $used_recids{$_} = 1; - } - close(F); -} - -my %used_tcns; -if ($used_tcn_file) { - open F, $used_tcn_file or die "Couldn't open used-tcn file $used_tcn_file"; - while () { - chomp; - s/^\s*//; - s/\s*$//; - $used_tcns{$_} = 1; - } - close(F); -} - -select STDERR; $| = 1; -select STDOUT; $| = 1; - -my $batch = new MARC::Batch ( $marctype, @files ); -$batch->strict_off(); -$batch->warnings_off(); - -my $starttime = time; -my $rec; -my $count = 0; -PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { - next if ($rec == -1); - - $count++; - - # Skip records that don't contain a required field (like '245', for example) - foreach my $req_field (@req_fields) { - if (!$rec->field("$req_field")) { - warn "\n!!! Record $count missing required field $req_field, skipping record.\n"; - next PROCESS; - } - } - - my $id; - my $tcn_value = ''; - my $tcn_source = ''; - # If $use901 is set, use it for the id, the tcn, and the tcn source without ANY further processing (i.e. no error checking) - if ($use901) { - $rec->delete_field($_) for ($rec->field(@trash_fields)); - $tcn_value = $rec->subfield('901' => 'a'); - $tcn_source = $rec->subfield('901' => 'b'); - $id = $rec->subfield('901' => 'c'); - } else { - # This section of code deals with the record's 'id', which is a system-level, numeric, internal identifier - # It is often convenient but not necessary to carry over the internal ids from your previous ILS, so here is where that happens - if ($id_field) { - my $field = $rec->field($id_field); - if ($field) { - if ($field->is_control_field) { - $id = $field->data; - } else { - $id = $field->subfield($id_subfield); - } - # ensure internal record ids are numeric only - $id =~ s/\D+//gso if $id; - } - - # catch problem ids - if (!$id) { - warn "\n!!! Record $count has missing or invalid id field $id_field, assigning new id.\n"; - $id = ''; - } elsif (exists $used_recids{$id}) { - warn "\n!!! Record $count has a duplicate id in field $id_field, assigning new id.\n"; - $id = ''; - } else { - $used_recids{$id} = 1; - } - } - - # id field not specified or found to be invalid, assign auto id - if (!$id) { - while (exists $used_recids{$recid}) { - $recid++; - } - $used_recids{$recid} = 1; - $id = $recid; - $recid++; - } - - # This section of code deals with the record's 'tcn', or title control number, which is a record-level, possibly alpha-numeric, sometimes user-supplied value - if ($tcn_field) { - if ($tcn_mapfile) { - if (my $tcn = $tcn_map{$id}) { - $rec->delete_field( $_ ) for ($rec->field($tcn_field)); - $rec->append_fields( MARC::Field->new( $tcn_field, '', '', $tcn_subfield, $tcn ) ); - } else { - warn "\n!!! ID $id not found in tcn_mapfile, skipping record.\n"; - $count++; - next; - } - } - - my $field = $rec->field($tcn_field); - if ($field) { - if ($field->is_control_field) { - $tcn_value = $field->data; - } else { - $tcn_value = $field->subfield($tcn_subfield); - } - # $tcn_offset is another Sirsi influence, as it will allow you to remove '(Sirsi)' - # from exported tcns, but was added more generically to perhaps support other use cases - if ($tcn_value) { - $tcn_value = substr($tcn_value, $tcn_offset); - } else { - $tcn_value = ''; - } - } - } - - # turn our id and tcn into a 901 field, and also create a tcn and/or figure out the tcn source - ($tcn_value, $tcn_source) = preprocess($rec, $tcn_value, $id); - # delete the old identifier and trash fields - $rec->delete_field($_) for ($rec->field('901', $tcn_field, $id_field, @trash_fields)); - } - - (my $xml = $rec->as_xml_record()) =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - $xml =~ s/[\x00-\x1f]//go; - - my $bib = new Fieldmapper::biblio::record_entry; - $bib->id($id); - $bib->active('t'); - $bib->deleted('f'); - $bib->marc($xml); - $bib->creator($user); - $bib->create_date('now'); - $bib->editor($user); - $bib->edit_date('now'); - $bib->tcn_source($tcn_source); - $bib->tcn_value($tcn_value); - $bib->last_xact_id('IMPORT-'.$starttime); - - print OpenSRF::Utils::JSON->perl2JSON($bib)."\n"; - $used_tcns{$tcn_value} = 1; - - if (!$quiet && !($count % 50)) { - print STDERR "\r$count\t". $count / (time - $starttime); - } -} - -if ($tcn_dumpfile) { - open TCN_DUMPFILE, '>', $tcn_dumpfile; - print TCN_DUMPFILE "$_\n" for (keys %used_tcns); -} - - -sub preprocess { - my $rec = shift; - my $tcn_value = shift; - my $id = shift; - - my $tcn_source = ''; - # in the following code, $tcn_number represents the portion of the tcn following the source code-letter - my $tcn_number = ''; - my $warn = 0; - my $passed_tcn = ''; - - # this preprocess subroutine is optimized for Sirsi-created tcns, that is, those with a single letter - # followed by some digits (and maybe 'x' in older systems). If using user supplied tcns, try to identify - # the source here, otherwise set to 'z' ('Unknown') - if ($tcn_value =~ /([a-z])([0-9xX]+)/) { - $tcn_source = $1; - $tcn_number = $2; - } else { - $tcn_source = 'z'; - } - - # save and warn if a passed in TCN is replaced - if ($tcn_value && exists $used_tcns{$tcn_value}) { - $passed_tcn = $tcn_value; - $tcn_value = ''; - $tcn_number = ''; - $tcn_source = ''; - $warn = 1; - } - - # we didn't have a user supplied tcn, or it was a duplicate, so let's derive one from commonly unique record fields - if (!$tcn_value) { - my $f = $rec->field('001'); - $tcn_value = despace($f->data) if ($f); - } - - if (!$tcn_value || exists $used_tcns{$tcn_value}) { - my $f = $rec->field('000'); - if ($f) { - $tcn_number = despace($f->data); - $tcn_source = 'g'; # only Project Gutenberg seems to use this - $tcn_value = $tcn_source.$tcn_number; - } - } - - if (!$tcn_value || exists $used_tcns{$tcn_value}) { - my $f = $rec->field('020'); - if ($f) { - $tcn_number = despace($f->subfield('a')); - $tcn_source = 'i'; - $tcn_value = $tcn_source.$tcn_number; - } - } - - if (!$tcn_value || exists $used_tcns{$tcn_value}) { - my $f = $rec->field('022'); - if ($f) { - $tcn_number = despace($f->subfield('a')); - $tcn_source = 'i'; - $tcn_value = $tcn_source.$tcn_number; - } - } - - if (!$tcn_value || exists $used_tcns{$tcn_value}) { - my $f = $rec->field('010'); - if ($f) { - $tcn_number = despace($f->subfield('a')); - $tcn_source = 'l'; - $tcn_value = $tcn_source.$tcn_number; - } - } - - # special case to catch possibly passed in full OCLC numbers and those derived from the 001 field - if ($tcn_value =~ /^oc(m|n)(\d+)$/o) { - $tcn_source = 'o'; - $tcn_number = $2; - $tcn_value = $tcn_source.$tcn_number; - } - - if (!$tcn_value || exists $used_tcns{$tcn_value}) { - $tcn_source = 's'; - $tcn_number = $id; - $tcn_value = $tcn_source.$tcn_number; - $warn = 1 - } - - - # expand $tcn_source from code letter to full name - $tcn_source = do { $tcn_source_map{$tcn_source} || 'Unknown' }; - - if ($warn) { - warn "\n!!! TCN $passed_tcn is already in use, using TCN ($tcn_value) derived from $tcn_source ID.\n"; - } - - return ($tcn_value, $tcn_source); -} - -sub despace { - my $value = shift; - - # remove all leading/trailing spaces and trucate at first internal space if present - $value =~ s/\s*$//o; - $value =~ s/^\s*//o; - $value =~ s/^(\S+).*$/$1/o; - - return $value; -} diff --git a/Open-ILS/src/extras/import/marc2sre.pl.in b/Open-ILS/src/extras/import/marc2sre.pl.in deleted file mode 100755 index 377aaf7045..0000000000 --- a/Open-ILS/src/extras/import/marc2sre.pl.in +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -use OpenSRF::System; -use OpenSRF::EX qw/:try/; -use OpenSRF::Utils::SettingsClient; -use OpenILS::Application::AppUtils; -use OpenILS::Event; -use OpenILS::Utils::Fieldmapper; -use OpenSRF::Utils::JSON; -use Unicode::Normalize; - -use Time::HiRes qw/time/; -use Getopt::Long; -use MARC::Batch; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Charset; -use Pod::Usage; - -MARC::Charset->ignore_errors(1); - -# Command line options, with applicable defaults -my ($idsubfield, $bibfield, $bibsubfield, @files, $libmap, $quiet, $help); -my $idfield = '004'; -my $count = 1; -my $user = 'admin'; -my $config = '@sysconfdir@/opensrf_core.xml'; -my $marctype = 'USMARC'; - -my $parse_options = GetOptions( - 'idfield=s' => \$idfield, - 'idsubfield=s' => \$idsubfield, - 'bibfield=s'=> \$bibfield, - 'bibsubfield=s'=> \$bibsubfield, - 'startid=i'=> \$count, - 'user=s' => \$user, - 'config=s' => \$config, - 'marctype=s' => \$marctype, - 'file=s' => \@files, - 'libmap=s' => \$libmap, - 'quiet' => \$quiet, - 'help' => \$help, -); - -if (!$parse_options or $help) { - pod2usage(0); -} - -@files = @ARGV if (!@files); - -my $U = 'OpenILS::Application::AppUtils'; -my @ses; -my @req; -my %processing_cache; -my $lib_id_map; -if ($libmap) { - $lib_id_map = map_libraries_to_ID($libmap); -} - -OpenSRF::System->bootstrap_client( config_file => $config ); -Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); - -my ($result, $evt) = get_user_id($user); -if ($evt || !$result->id) { - print("Could not retrieve user with user name '$user'\n"); - exit(0); -} - -$user = $result->id; - -select STDERR; $| = 1; -select STDOUT; $| = 1; - -my $batch = new MARC::Batch ( $marctype, @files ); -$batch->strict_off(); -$batch->warnings_off(); - -my $starttime = time; -my $rec; -while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { - next if ($rec == -1); - my $id = $count; - my $record_field; - if ($idsubfield) { - $record_field = $rec->field($idfield, $idsubfield); - } else { - $record_field = $rec->field($idfield); - } - my $record = $count; - - if ($record_field) { - $record = $record_field->data; - } - - # If we have been given bibfield / bibsubfield values, use those to find - # a matching bib record for $record and use _that_ as our record instead - if ($bibfield) { - my ($result, $evt) = map_id_to_bib($record); - if ($evt || !$result->record) { - print("Could not find matching bibliographic record for $record\n"); - } - $record = $result->record; - } else { - # Strip the identifier down to a usable integer - $record =~ s/^.*?(\d+).*?$/$1/o; - } - - (my $xml = $rec->as_xml_record()) =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - $xml =~ s/[\x00-\x1f]//go; - - my $bib = new Fieldmapper::serial::record_entry; - $bib->id($id); - $bib->record($record); - $bib->active('t'); - $bib->deleted('f'); - $bib->marc($xml); - $bib->creator($user); - $bib->create_date('now'); - $bib->editor($user); - $bib->edit_date('now'); - $bib->last_xact_id('IMPORT-'.$starttime); - - if ($libmap) { - my $lib_id = get_library_id($rec); - if ($lib_id) { - $bib->owning_lib($lib_id); - } - } - - print OpenSRF::Utils::JSON->perl2JSON($bib)."\n"; - - $count++; - - if (!$quiet && !($count % 20)) { - print STDERR "\r$count\t". $count / (time - $starttime); - } -} - -# Generate a hash of library names (as found in the 852b in the MFHD record) to -# integers representing actor.org_unit ID values -sub map_libraries_to_ID { - my $map_filename = shift; - - my %lib_id_map; - - open(MAP_FH, '<', $map_filename) or die "Could not load [$map_filename] $!"; - while () { - my ($lib, $id) = $_ =~ /^(.*?)\t(.*?)$/; - $lib_id_map{$lib} = $id; - } - - return \%lib_id_map; -} - -# Look up the actor.org_unit.id value for this library name -sub get_library_id { - my $record = shift; - - my $lib_name = $record->field('852')->subfield('b'); - my $lib_id = $lib_id_map->{$lib_name}; - - return $lib_id; -} - -# Get the actor.usr.id value for the given username -sub get_user_id { - my $username = shift; - - my ($result, $evt); - - $result = $U->cstorereq( - 'open-ils.cstore.direct.actor.user.search', - { usrname => $username, deleted => 'f' } - ); - $evt = OpenILS::Event->new('ACTOR_USR_NOT_FOUND') unless $result; - - return ($result, $evt); -} - -# Get the biblio.record_entry.id value for the given identifier; note that this -# approach uses a wildcard to match anything that precedes the identifier value -sub map_id_to_bib { - my $record = shift; - - my ($result, $evt); - - my %search = ( - tag => $bibfield, - value => { ilike => '%' . $record } - ); - - if ($bibsubfield) { - $search{'subfield'} = $bibsubfield; - } - - $result = $U->cstorereq( - 'open-ils.cstore.direct.metabib.full_rec.search', \%search - ); - $evt = OpenILS::Event->new('METABIB_FULL_REC_NOT_FOUND') unless $record; - - return ($result, $evt); -} - -__END__ - -=head1 NAME - -marc2sre.pl - Convert MARC Format for Holdings Data (MFHD) records to SRE -(serial.record_entry) JSON objects - -=head1 SYNOPSIS - -C [B<--config>=I] -[[B<--idfield>=I[ B<--idsubfield>=I]] [B<--start_id>=I] -[B<--user>=I] [B<--marctype>=I] -[[B<--file>=I[, ...]] [B<--libmap>=I] [B<--quiet>=I] -[[B<--bibfield>=I [B<--bibsubfield>=]] - -=head1 DESCRIPTION - -For one or more files containing MFHD records, iterate through the records -and generate SRE (serial.record_entry) JSON objects. - -=head1 OPTIONS - -=over - -=item * B<-c> I, B<--config>=I - -Specifies the OpenSRF configuration file used to connect to the OpenSRF router. -Defaults to F<@sysconfdir@/opensrf_core.xml> - -=item * B<--idfield> I - -Specifies the MFHD field where the identifier of the corresponding -bibliographic record is found. Defaults to '004'. - -=item * B<--idsubfield> I - -Specifies the MFHD subfield, if any, where the identifier of the corresponding -bibliographic record is found. This option is ignored unless it is accompanied -by the B<--idfield> option. Defaults to null. - -=item * B<--bibfield> I - -Specifies the field in the bibliographic record that holds the identifier -value. Defaults to null. - -=item * B<--bibsubfield> I - -Specifies the subfield in the bibliographic record, if any, that holds the -identifier value. This option is ignored unless it is accompanied by the -B<--bibfield> option. Defaults to null. - -=item * B<-u> I, B<--user>=I - -Specifies the Evergreen user that will own these serial records. - -=item * B<-m> I, B<--marctype>=I - -Specifies whether the files containg the MFHD records are in MARC21 ('MARC21') -or MARC21XML ('XML') format. Defaults to MARC21. - -=item * B<-l> I, B<--libmap>=I - -Points to a file to containing a mapping of library names to integers. -The integer represents the actor.org_unit.id value of the library. This enables -us to generate an ingest file that does not subsequently need to manually -manipulated. - -The library name must correspond to the 'b' subfield of the 852 field. -Well, it does not have to, but you will have to modify this script -accordingly. - -The format of the map file should be the name of the library, followed -by a tab, followed by the desired numeric ID of the library. For example: - -BR1 4 -BR2 5 - -=item * B<-q>, B<--quiet> - -Suppresses the record counter output. - -=back - -=head1 EXAMPLES - - marc2sre.pl --idfield 004 --bibfield 035 --bibsubfield a --user cat1 serial_holding.xml - -Processes MFHD records in the B file. The script pulls the -bibliographic record identifier from the 004 control field of the MFHD record -and searches for a matching value in the bibliographic record in data field -035, subfield a. The "cat1" user will own the processed MFHD records. - -=head1 AUTHOR - -Dan Scott - -=head1 COPYRIGHT AND LICENSE - -Copyright 2010-2011 by Dan Scott - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -=cut diff --git a/Open-ILS/src/extras/import/parallel_pg_loader.pl b/Open-ILS/src/extras/import/parallel_pg_loader.pl index 211d7f60e5..43ddaa9ce5 100755 --- a/Open-ILS/src/extras/import/parallel_pg_loader.pl +++ b/Open-ILS/src/extras/import/parallel_pg_loader.pl @@ -2,6 +2,8 @@ use strict; use warnings; +use lib '/openils/lib/perl5/'; + use OpenSRF::System; use OpenSRF::EX qw/:try/; use OpenSRF::Utils::SettingsClient; diff --git a/Open-ILS/src/extras/import/parallel_pg_loader.pl.in b/Open-ILS/src/extras/import/parallel_pg_loader.pl.in deleted file mode 100755 index f276f0b8d9..0000000000 --- a/Open-ILS/src/extras/import/parallel_pg_loader.pl.in +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -use OpenSRF::System; -use OpenSRF::EX qw/:try/; -use OpenSRF::Utils::SettingsClient; -use OpenILS::Utils::Fieldmapper; -use OpenSRF::Utils::JSON; -use FileHandle; - -use Time::HiRes qw/time/; -use Getopt::Long; - -my @files; -my ($config, $output, @auto, @order, @wipe) = - ('@sysconfdir@/opensrf_core.xml', 'pg_loader-output'); -my $nocommit = 0; - -GetOptions( - 'config=s' => \$config, - 'output=s' => \$output, - 'wipe=s' => \@wipe, - 'autoprimary=s' => \@auto, - 'order=s' => \@order, - 'nocommit=i' => \$nocommit, -); - -my $pwd = `pwd`; -chop($pwd); - -my %lineset; -my %fieldcache; - -OpenSRF::System->bootstrap_client( config_file => $config ); -Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); - -my $main_out = FileHandle->new(">$output.sql") if ($output); - -binmode($main_out,'utf8'); - -$main_out->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n"); -$main_out->print("BEGIN;\n\n"); - -my %out_files; -for my $h (@order) { - $out_files{$h} = FileHandle->new(">$output.$h.sql"); - binmode($out_files{$h},'utf8'); -} - -my $count = 0; -my $starttime = time; -my $after_commit = ''; -while ( my $rec = <> ) { - next unless ($rec); - - my $row; - try { - $row = OpenSRF::Utils::JSON->JSON2perl($rec); - } catch Error with { - my $e = shift; - warn "\n\n !!! Error : $e \n\n at or around line $count\n"; - }; - next unless ($row); - - my $class = $row->class_name; - my $hint = $row->json_hint; - - next unless ( grep /$hint/, @order ); - - if (!$fieldcache{$hint}) { - my @cols = $row->real_fields; - if (grep { $_ eq $hint} @auto) { - @cols = grep { $_ ne $class->Identity } @cols; - } - - $fieldcache{$hint} = - { table => $class->Table, - sequence => $class->Sequence, - pkey => $class->Identity, - fields => \@cols, - }; - - #XXX it burnnnsssessss - $fieldcache{$hint}{table} =~ s/\.full_rec/.real_full_rec/o if ($hint eq 'mfr'); - - my $fields = join(',', @{ $fieldcache{$hint}{fields} }); - $main_out->print( "DELETE FROM $fieldcache{$hint}{table};\n" ) if (grep {$_ eq $hint } @wipe); - # Speed up loading of bib records - $main_out->print( "COPY $fieldcache{$hint}{table} ($fields) FROM '$pwd/$output.$hint.sql';\n" ); - - } - - my $line = [map { $row->$_ } @{ $fieldcache{$hint}{fields} }]; - my @data; - my $x = 0; - for my $d (@$line) { - if (!defined($d)) { - $d = '\N'; - } else { - $d =~ s/\f/\\f/gos; - $d =~ s/\n/\\n/gos; - $d =~ s/\r/\\r/gos; - $d =~ s/\t/\\t/gos; - $d =~ s/\\/\\\\/gos; - } - if ($hint eq 'bre' and $fieldcache{$hint}{fields}[$x] eq 'quality') { - $d = int($d) if ($d ne '\N'); - } - push @data, $d; - $x++; - } - $out_files{$hint}->print( join("\t", @data)."\n" ); - - if (!($count % 500)) { - print STDERR "\r$count\t". $count / (time - $starttime); - } - - $count++; -} - -for my $hint (@order) { - next if (grep { $_ eq $hint} @auto); - next unless ($fieldcache{$hint}{sequence}); - $after_commit .= "SELECT setval('$fieldcache{$hint}{sequence}'::TEXT, (SELECT MAX($fieldcache{$hint}{pkey}) FROM $fieldcache{$hint}{table}), TRUE);\n"; -} - -if (grep /^mfr$/, %out_files) { - $main_out->print("SELECT reporter.enable_materialized_simple_record_trigger();\n"); - $main_out->print("SELECT reporter.disable_materialized_simple_record_trigger();\n"); -} - -$main_out->print("COMMIT;\n\n") unless $nocommit; -$main_out->print($after_commit); -$main_out->close; - diff --git a/configure.ac b/configure.ac index c1f5137f42..88a84dceeb 100644 --- a/configure.ac +++ b/configure.ac @@ -373,19 +373,12 @@ AC_CONFIG_FILES([Makefile Open-ILS/updates/Makefile Open-ILS/xul/staff_client/Makefile Open-ILS/src/extras/eg_config - Open-ILS/src/extras/import/marc2are.pl - Open-ILS/src/extras/import/marc2bre.pl - Open-ILS/src/extras/import/marc2sre.pl - Open-ILS/src/extras/import/parallel_pg_loader.pl + Open-ILS/src/extras/fast-extract Open-ILS/src/perlmods/Makefile Open-ILS/src/perlmods/lib/OpenILS/Utils/Cronscript.pm], [ if test -e "./Open-ILS/src/extras/eg_config"; then chmod 755 Open-ILS/src/extras/eg_config; fi; if test -e "./Open-ILS/src/extras/fast-extract"; then chmod 755 Open-ILS/src/extras/fast-extract; fi; - if test -e "./Open-ILS/src/extras/import/marc2are.pl"; then chmod 755 Open-ILS/src/extras/import/marc2are.pl; fi; - if test -e "./Open-ILS/src/extras/import/marc2bre.pl"; then chmod 755 Open-ILS/src/extras/import/marc2bre.pl; fi; - if test -e "./Open-ILS/src/extras/import/marc2sre.pl"; then chmod 755 Open-ILS/src/extras/import/marc2sre.pl; fi; - if test -e "./Open-ILS/src/extras/import/parallel_pg_loader.pl"; then chmod 755 Open-ILS/src/extras/import/parallel_pg_loader.pl; fi; ]) AC_OUTPUT -- 2.43.2