pipline fixups; integer qualities only, please
[Evergreen.git] / Open-ILS / src / extras / import / direct_ingest.pl
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 use lib '/openils/lib/perl5/';
6
7 use OpenSRF::System;
8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Application;
11 use OpenSRF::MultiSession;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenILS::Application::Ingest;
14 use OpenILS::Application::AppUtils;
15 use OpenILS::Utils::Fieldmapper;
16 use Digest::MD5 qw/md5_hex/;
17 use JSON;
18 use Data::Dumper;
19 use FileHandle;
20
21 use Time::HiRes qw/time/;
22 use Getopt::Long;
23 use MARC::Batch;
24 use MARC::File::XML;
25 use MARC::Charset;
26
27 MARC::Charset->ignore_errors(1);
28
29 my ($workers, $config, $prefix) =
30         (1, '/openils/conf/bootstrap.conf', 'marc-out-');
31
32 GetOptions(
33         'threads=i'     => \$workers,
34         'config=s'      => \$config,
35         'prefix=s'      => \$prefix,
36 );
37
38 my @ses;
39
40 open NEWERR,     ">&STDERR";
41
42 select NEWERR; $| = 1;
43 select STDERR; $| = 1;
44 select STDOUT; $| = 1;
45
46 for (1 .. $workers) {
47         my ($r,$w);
48         pipe($r,$w);
49         if (fork) {
50                 push @ses, $w;
51         } else {
52                 $0 = "Local Ingest Worker $_";
53                 if ($workers == 1) {
54                         worker($r, -1);
55                 } else {
56                         worker($r, $_);
57                 }
58                 exit;
59         }
60 }
61 $0 = "Local Ingest Master";
62
63 sub worker {
64         my $pipe = shift;
65         my $file = shift;
66
67         OpenSRF::System->bootstrap_client( config_file => $config );
68         Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
69
70         sleep 1;
71
72         OpenILS::Application::Ingest->use;
73
74         my $fname = "${prefix}$file";
75         if ($file == -1) {
76                 $fname = '&STDOUT';
77         }
78
79         my $f = new FileHandle(">$fname");
80
81         while (my $rec = <$pipe>) {
82
83                 my $bib = JSON->JSON2perl($rec);
84                 my $data;
85
86                 try {
87                         ($data) = OpenILS::Application::Ingest
88                                 ->method_lookup( 'open-ils.ingest.full.biblio.object.readonly' )
89                                 ->run( $bib );
90                 } catch Error with {
91                         my $e = shift;
92                         warn "Couldn't process record: $e\n >>> $rec\n";
93                 };
94
95                 next unless $data;
96
97                 postprocess(
98                         { bib           => $bib,
99                         worm_data       => $data,
100                         },
101                         $f
102                 );
103         }
104 }
105
106 my $count = 0;
107 my $starttime = time;
108 while ( my $rec = <> ) {
109         next unless ($rec);
110         my $session_index = $count % $workers;
111
112         $ses[$session_index]->printflush( $rec );
113
114         if (!($count % 20)) {
115                 print NEWERR "\r$count\t". $count / (time - $starttime);
116         }
117
118         $count++;
119 }
120
121 $ses[$_]->close for (@ses);
122 sub postprocess {
123         my $data = shift;
124         my $f = shift;
125
126         my $bib = $data->{bib};
127         my $field_entries = $data->{worm_data}->{field_entries};
128         my $full_rec = $data->{worm_data}->{full_rec};
129         my $fp = $data->{worm_data}->{fingerprint};
130         my $rd = $data->{worm_data}->{descriptor};
131
132         $bib->fingerprint( $fp->{fingerprint} );
133         $bib->quality( $fp->{quality} );
134
135         $f->printflush( JSON->perl2JSON($bib)."\n" );
136         $f->printflush( JSON->perl2JSON($rd)."\n" );
137         $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$field_entries);
138         $f->printflush( JSON->perl2JSON($_)."\n" ) for (@$full_rec);
139 }
140