adding simpler loader scripts
authormiker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Wed, 12 Jul 2006 19:26:09 +0000 (19:26 +0000)
committermiker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Wed, 12 Jul 2006 19:26:09 +0000 (19:26 +0000)
git-svn-id: svn://svn.open-ils.org/ILS/trunk@4960 dcc99617-32d9-48b4-a31d-7c20da2025e4

Open-ILS/src/extras/import/direct_ingest.pl [new file with mode: 0755]
Open-ILS/src/extras/import/direct_loader.pl [new file with mode: 0755]
Open-ILS/src/extras/import/marc2bre.pl [new file with mode: 0755]

diff --git a/Open-ILS/src/extras/import/direct_ingest.pl b/Open-ILS/src/extras/import/direct_ingest.pl
new file mode 100755 (executable)
index 0000000..b077347
--- /dev/null
@@ -0,0 +1,127 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+use lib '/openils/lib/perl5/';
+
+use OpenSRF::System;
+use OpenSRF::EX qw/:try/;
+use OpenSRF::AppSession;
+use OpenSRF::Application;
+use OpenSRF::MultiSession;
+use OpenSRF::Utils::SettingsClient;
+use OpenILS::Application::Ingest;
+use OpenILS::Application::AppUtils;
+use OpenILS::Utils::Fieldmapper;
+use Digest::MD5 qw/md5_hex/;
+use JSON;
+use Data::Dumper;
+use FileHandle;
+
+use Time::HiRes qw/time/;
+use Getopt::Long;
+use MARC::Batch;
+use MARC::File::XML;
+use MARC::Charset;
+
+MARC::Charset->ignore_errors(1);
+
+my ($workers, $config, $prefix) =
+       (1, '/openils/conf/bootstrap.conf', 'marc-out-');
+
+GetOptions(
+       'threads=i'     => \$workers,
+       'config=s'      => \$config,
+       'prefix=s'      => \$prefix,
+);
+
+my @ses;
+
+open NEWERR,     ">&STDERR";
+
+select NEWERR; $| = 1;
+select STDERR; $| = 1;
+select STDOUT; $| = 1;
+
+for (1 .. $workers) {
+       my ($r,$w);
+       pipe($r,$w);
+       if (fork) {
+               push @ses, $w;
+       } else {
+               $0 = "Local Ingest Worker $_";
+               worker($r, $_);
+               exit;
+       }
+}
+$0 = "Local Ingest Master";
+
+sub worker {
+       my $pipe = shift;
+       my $file = shift;
+
+       OpenSRF::System->bootstrap_client( config_file => $config );
+       Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+       OpenILS::Application::Ingest->use;
+
+       my $f = new FileHandle(">${prefix}$file");
+       while (my $rec = <$pipe>) {
+
+               my $bib = JSON->JSON2perl($rec);
+               my $data;
+
+               try {
+                       ($data) = OpenILS::Application::Ingest
+                               ->method_lookup( 'open-ils.ingest.full.biblio.object.readonly' )
+                               ->run( $bib );
+               } catch Error with {
+                       my $e = shift;
+                       warn "Couldn't process record: $e\n >>> $rec\n";
+               };
+
+               next unless $data;
+
+               postprocess(
+                       { bib           => $bib,
+                       worm_data       => $data,
+                       },
+                       $f
+               );
+       }
+}
+
+my $count = 0;
+my $starttime = time;
+while ( my $rec = <> ) {
+       next unless ($rec);
+       my $session_index = $count % $workers;
+
+       $ses[$session_index]->printflush( $rec );
+
+       if (!($count % 20)) {
+               print NEWERR "\r$count\t". $count / (time - $starttime);
+       }
+
+       $count++;
+}
+
+sub postprocess {
+       my $data = shift;
+       my $f = shift;
+
+       my $bib = $data->{bib};
+       my $field_entries = $data->{worm_data}->{field_entries};
+       my $full_rec = $data->{worm_data}->{full_rec};
+       my $fp = $data->{worm_data}->{fingerprint};
+       my $rd = $data->{worm_data}->{descriptor};
+
+       $bib->fingerprint( $fp->{fingerprint} );
+       $bib->quality( $fp->{quality} );
+
+       $f->printflush( JSON->perl2JSON($bib)."\n" );
+       $f->printflush( JSON->perl2JSON($rd)."\n" );
+       $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$field_entries);
+       $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$full_rec);
+}
+
diff --git a/Open-ILS/src/extras/import/direct_loader.pl b/Open-ILS/src/extras/import/direct_loader.pl
new file mode 100755 (executable)
index 0000000..1ed48da
--- /dev/null
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+use lib '/openils/lib/perl5/';
+
+use OpenSRF::System;
+use OpenSRF::EX qw/:try/;
+use OpenSRF::AppSession;
+use OpenSRF::Application;
+use OpenSRF::MultiSession;
+use OpenSRF::Utils::SettingsClient;
+use OpenILS::Application::Storage;
+use OpenILS::Application::AppUtils;
+use OpenILS::Utils::Fieldmapper;
+use Digest::MD5 qw/md5_hex/;
+use JSON;
+use Data::Dumper;
+use FileHandle;
+
+use Time::HiRes qw/time/;
+use Getopt::Long;
+use MARC::Batch;
+use MARC::File::XML;
+use MARC::Charset;
+
+MARC::Charset->ignore_errors(1);
+
+my @files;
+my ($type, $config, $autoprimary) =
+       ('biblio.record_entry', '/openils/conf/bootstrap.conf', 0);
+
+GetOptions(
+       'type=s'        => \$type,
+       'config=s'      => \$config,
+       'autoprimary'   => \$config,
+);
+
+
+OpenSRF::System->bootstrap_client( config_file => $config );
+Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+OpenILS::Application::Storage->use;
+OpenILS::Application::Storage->initialize;
+OpenILS::Application::Storage->child_init || die;
+
+if ($autoprimary) {
+       OpenILS::Application::Storage->autoprimary(1);
+}
+
+my $base = "open-ils.storage.direct.$type.batch.create";
+
+OpenSRF::Application->method_lookup( "$base.start" )->run; 
+
+my $count = 0;
+my $starttime = time;
+while ( my $rec = <> ) {
+       next unless ($rec);
+
+       my $row = JSON->JSON2perl($rec);
+
+       OpenSRF::Application->method_lookup( "$base.push" )->run($row); 
+
+
+       if (!($count % 20)) {
+               print STDERR "\r$count\t". $count / (time - $starttime);
+       }
+
+       $count++;
+}
+OpenSRF::Application->method_lookup( "$base.finish" )->run; 
+
+
diff --git a/Open-ILS/src/extras/import/marc2bre.pl b/Open-ILS/src/extras/import/marc2bre.pl
new file mode 100755 (executable)
index 0000000..19ce90d
--- /dev/null
@@ -0,0 +1,251 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+use lib '/openils/lib/perl5/';
+
+#use OpenSRF::System;
+#use OpenILS::Utils::Fieldmapper;
+#use OpenSRF::Utils::SettingsClient;
+#
+#OpenSRF::System->bootstrap_client(config_file =>
+#'/openils/conf/bootstrap.conf');
+#Fieldmapper->import(IDL =>
+#  OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+#
+#  # do this after bootstrapping/importing IDL
+#  require OpenILS::Application::Search;
+#
+#  my $meth = OpenSRF::Application->method_lookup(
+#    'open-ils.search.biblio.metarecord.mods_slim.retrieve');
+#    my @data = $meth->run(1);
+#    my $mods = shift @data;
+#    print "Got mvr: " . $mods->title . "\n";
+
+
+
+use OpenSRF::System;
+use OpenSRF::Application;
+use OpenSRF::EX qw/:try/;
+use OpenSRF::AppSession;
+use OpenSRF::MultiSession;
+use OpenSRF::Utils::SettingsClient;
+use OpenILS::Application::AppUtils;
+use OpenILS::Utils::Fieldmapper;
+use Digest::MD5 qw/md5_hex/;
+use JSON;
+use Data::Dumper;
+
+use Time::HiRes qw/time/;
+use Getopt::Long;
+use MARC::Batch;
+use MARC::File::XML;
+use MARC::Charset;
+use UNIVERSAL::require;
+
+MARC::Charset->ignore_errors(1);
+
+my ($id_field, $count, $user, $password, $config, $keyfile,  @files) =
+       ('998', 1, 'admin', 'open-ils', '/openils/conf/bootstrap.conf');
+
+GetOptions(
+       'startid=i'     => \$count,
+       'idfield=s'     => \$id_field,
+       'user=s'        => \$user,
+       'password=s'    => \$password,
+       'keyfile=s'     => \$keyfile,
+       'config=s'      => \$config,
+       'file=s'        => \@files,
+);
+
+@files = @ARGV if (!@files);
+
+my @ses;
+my @req;
+my %processing_cache;
+
+my %source_map = (      
+       o  => 'OCLC',
+       i  => 'ISxN',    
+       l  => 'Local',
+       s  => 'System',  
+       g  => 'Gutenberg',  
+);                              
+
+
+OpenSRF::System->bootstrap_client( config_file => $config );
+Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+$user = OpenILS::Application::AppUtils->check_user_session( login($user,$password) )->id;
+
+my %keymap;
+if ($keyfile) {
+       open F, $keyfile or die "Couldn't open key file $keyfile";
+       while (<F>) {
+               if ( /^(\d+)\|(\S+)/o ) {
+                       $keymap{$1} = $2;
+               }
+       }
+}
+
+select STDERR; $| = 1;
+select STDOUT; $| = 1;
+
+my $batch = new MARC::Batch ( 'USMARC', @files );
+$batch->strict_off();
+$batch->warnings_off();
+
+my $starttime = time;
+while ( my $rec = $batch->next ) {
+
+       my $id = $rec->subfield($id_field => 'a') || $count;
+       if ($id =~ /(\d+)/o) {
+               $id = $1;
+       }
+
+       if ($keyfile) {
+               if (my $tcn = $keymap{$id}) {
+                       $rec->delete_field( $_ ) for ($rec->field('035'));
+                       $rec->append_fields( MARC::Field->new( '035', '', '', 'a', $tcn ) );
+               } else {
+                       $count++;
+                       next;
+               }
+       }
+
+       $rec = preprocess($rec, $id);
+
+       if (!$rec) {
+               next;
+       }
+
+       my $tcn_value = $rec->subfield('039' => 'a');
+       my $tcn_source = $rec->subfield('039' => 'b');
+
+       (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
+       $xml =~ s/^<\?xml.+\?\s*>//go;
+       $xml =~ s/>\s+</></go;
+
+       my $bib = new Fieldmapper::biblio::record_entry;
+       $bib->id($id);
+       $bib->active('t');
+       $bib->deleted('f');
+       $bib->marc($xml);
+       $bib->creator($user);
+       $bib->create_date('now');
+       $bib->editor($user);
+       $bib->edit_date('now');
+       $bib->tcn_source($tcn_source);
+       $bib->tcn_value($tcn_value);
+       $bib->last_xact_id('IMPORT-'.$starttime);
+
+       print JSON->perl2JSON($bib)."\n";
+
+       $count++;
+
+       if (!($count % 20)) {
+               print STDERR "\r$count\t". $count / (time - $starttime);
+       }
+}
+
+sub preprocess {
+       my $rec = shift;
+
+       my ($id, $source, $value);
+
+       if (!$id) {
+               my $f = $rec->field('001');
+               $id = $f->data if ($f);
+       }
+
+       if (!$id) {
+               my $f = $rec->field('000');
+               $id = 'g'.$f->data if ($f);
+               $source = 'g';
+       }
+
+        if (!$id) {
+                my $f = $rec->field('020');
+                $id = $f->subfield('a') if ($f);
+               $source = 'i';
+        }
+
+        if (!$id) {
+                my $f = $rec->field('022');
+                $id = $f->subfield('a') if ($f);
+               $source = 'i';
+        }
+
+        if (!$id) {
+                my $f = $rec->field('010');
+                $id = $f->subfield('a') if ($f);
+               $source = 'l';
+        }
+
+        if (!$id) {
+                my $f = $rec->field('035');
+                $id = $f->subfield('a') if ($f);
+               $source = 's';
+        }
+
+       if (!$id) {
+               $count++;
+               warn "\n !!! Record with no TCN : $count\n".$rec->as_formatted;
+               return undef;
+       }
+
+       $id =~ s/\s*$//o;
+       $id =~ s/^\s*//o;
+       $id =~ s/(\S+)$/$1/o;
+
+       $id = $source.$id if ($source);
+
+       ($source, $value) = $id =~ /^(.)(.+)$/o;
+       if ($id =~ /^o(\d+)$/o) {
+               $id = "ocm$1";
+               $source = 'o';
+       }
+
+       my $tcn = MARC::Field->new(
+               '039',
+               '', '',
+               'a', $id,
+               'b', do { $source_map{$source} || 'System' },
+       );
+
+       $rec->delete_field($_) for ($rec->field('035','948','998'));
+       $rec->append_fields($tcn);
+
+       return $rec;
+}
+
+sub login {        
+       my( $username, $password, $type ) = @_;
+
+       $type |= "staff"; 
+
+       my $seed = OpenILS::Application::AppUtils->simplereq(
+               'open-ils.auth',
+               'open-ils.auth.authenticate.init',
+               $username
+       );
+
+       die("No auth seed. Couldn't talk to the auth server") unless $seed;
+
+       my $response = OpenILS::Application::AppUtils->simplereq(
+               'open-ils.auth',
+               'open-ils.auth.authenticate.complete',
+                {       username => $username,
+                        password => md5_hex($seed . md5_hex($password)),
+                        type => $type });
+
+        die("No auth response returned on login.") unless $response;
+
+        my $authtime = $response->{payload}->{authtime};
+        my $authtoken = $response->{payload}->{authtoken};
+
+       die("Login failed for user $username!") unless $authtoken;
+
+        return $authtoken;
+}       
+