adding simpler loader scripts
[Evergreen.git] / Open-ILS / src / extras / import / direct_ingest.pl
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 use lib '/openils/lib/perl5/';
6
7 use OpenSRF::System;
8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Application;
11 use OpenSRF::MultiSession;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenILS::Application::Ingest;
14 use OpenILS::Application::AppUtils;
15 use OpenILS::Utils::Fieldmapper;
16 use Digest::MD5 qw/md5_hex/;
17 use JSON;
18 use Data::Dumper;
19 use FileHandle;
20
21 use Time::HiRes qw/time/;
22 use Getopt::Long;
23 use MARC::Batch;
24 use MARC::File::XML;
25 use MARC::Charset;
26
27 MARC::Charset->ignore_errors(1);
28
29 my ($workers, $config, $prefix) =
30         (1, '/openils/conf/bootstrap.conf', 'marc-out-');
31
32 GetOptions(
33         'threads=i'     => \$workers,
34         'config=s'      => \$config,
35         'prefix=s'      => \$prefix,
36 );
37
38 my @ses;
39
40 open NEWERR,     ">&STDERR";
41
42 select NEWERR; $| = 1;
43 select STDERR; $| = 1;
44 select STDOUT; $| = 1;
45
46 for (1 .. $workers) {
47         my ($r,$w);
48         pipe($r,$w);
49         if (fork) {
50                 push @ses, $w;
51         } else {
52                 $0 = "Local Ingest Worker $_";
53                 worker($r, $_);
54                 exit;
55         }
56 }
57 $0 = "Local Ingest Master";
58
59 sub worker {
60         my $pipe = shift;
61         my $file = shift;
62
63         OpenSRF::System->bootstrap_client( config_file => $config );
64         Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
65
66         OpenILS::Application::Ingest->use;
67
68         my $f = new FileHandle(">${prefix}$file");
69         while (my $rec = <$pipe>) {
70
71                 my $bib = JSON->JSON2perl($rec);
72                 my $data;
73
74                 try {
75                         ($data) = OpenILS::Application::Ingest
76                                 ->method_lookup( 'open-ils.ingest.full.biblio.object.readonly' )
77                                 ->run( $bib );
78                 } catch Error with {
79                         my $e = shift;
80                         warn "Couldn't process record: $e\n >>> $rec\n";
81                 };
82
83                 next unless $data;
84
85                 postprocess(
86                         { bib           => $bib,
87                         worm_data       => $data,
88                         },
89                         $f
90                 );
91         }
92 }
93
94 my $count = 0;
95 my $starttime = time;
96 while ( my $rec = <> ) {
97         next unless ($rec);
98         my $session_index = $count % $workers;
99
100         $ses[$session_index]->printflush( $rec );
101
102         if (!($count % 20)) {
103                 print NEWERR "\r$count\t". $count / (time - $starttime);
104         }
105
106         $count++;
107 }
108
109 sub postprocess {
110         my $data = shift;
111         my $f = shift;
112
113         my $bib = $data->{bib};
114         my $field_entries = $data->{worm_data}->{field_entries};
115         my $full_rec = $data->{worm_data}->{full_rec};
116         my $fp = $data->{worm_data}->{fingerprint};
117         my $rd = $data->{worm_data}->{descriptor};
118
119         $bib->fingerprint( $fp->{fingerprint} );
120         $bib->quality( $fp->{quality} );
121
122         $f->printflush( JSON->perl2JSON($bib)."\n" );
123         $f->printflush( JSON->perl2JSON($rd)."\n" );
124         $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$field_entries);
125         $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$full_rec);
126 }
127