2 # Copyright (C) 2008-2014 Equinox Software, Inc.
3 # Copyright (C) 2014 King County Library System
4 # Author: Bill Erickson <berickxx@gmail.com>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 # Sends MARC records, either from a file or from data delivered
18 # via the network, to open-ils.vandelay to be imported.
19 # ---------------------------------------------------------------
22 use Net::Server::PreFork;
23 use base qw/Net::Server::PreFork/;
25 require 'oils_header.pl';
26 use vars qw/$apputils $authtoken/;
31 use MARC::File::XML (BinaryEncoding => 'UTF-8');
32 use MARC::File::USMARC;
33 use File::Basename qw/fileparse/;
34 use File::Temp qw/tempfile/;
35 use OpenSRF::AppSession;
36 use OpenSRF::Utils::Logger qw/$logger/;
37 use OpenSRF::Transport::PeerHandle;
38 use OpenSRF::Utils::SettingsClient;
41 $Data::Dumper::Indent=0; # for logging
43 # This script will always be an entry point for opensrf,
44 # so go ahead and force log client.
45 $ENV{OSRF_LOG_CLIENT} = 1;
47 # these are updated with each new batch of records
53 my $cur_merge_profile; # this is an object
54 my $bib_merge_profile_obj;
55 my $auth_merge_profile_obj;
59 my $osrf_config = '/openils/conf/opensrf_core.xml';
66 my $bib_merge_profile;
67 my $auth_merge_profile;
72 my $bib_import_no_match;
73 my $bib_auto_overlay_exact;
74 my $bib_auto_overlay_1match;
75 my $bib_auto_overlay_best_match;
76 my $auth_import_no_match;
77 my $auth_auto_overlay_exact;
78 my $auth_auto_overlay_1match;
79 my $auth_auto_overlay_best_match;
81 # deprecated options; these map to their bib_* equivalents
83 my $auto_overlay_exact;
84 my $auto_overlay_1match;
85 my $auto_overlay_best_match;
90 my $net_server_conf = fileparse($0, '.pl').'.conf';
93 'osrf-config=s' => \$osrf_config,
94 'verbose' => \$verbose,
95 'username=s' => \$username,
96 'password=s' => \$password,
97 'tempdir=s' => \$tempdir,
98 'spoolfile=s' => \$spoolfile,
99 'wait=i' => \$wait_time,
100 'merge-profile=i' => \$bib_merge_profile,
101 'queue=i' => \$deprecated_queue,
102 'bib-queue=i' => \$bib_queue,
103 'source=i' => \$bib_source,
104 'auth-merge-profile=i' => \$auth_merge_profile,
105 'auth-queue=i' => \$auth_queue,
108 'import-no-match' => \$import_no_match,
109 'auto-overlay-exact' => \$auto_overlay_exact,
110 'auto-overlay-1match' => \$auto_overlay_1match,
111 'auto-overlay-best-match' => \$auto_overlay_best_match,
114 'bib-import-no-match' => \$bib_import_no_match,
115 'bib-auto-overlay-exact' => \$bib_auto_overlay_exact,
116 'bib-auto-overlay-1match' => \$bib_auto_overlay_1match,
117 'bib-auto-overlay-best-match' => \$bib_auto_overlay_best_match,
118 'auth-import-no-match' => \$auth_import_no_match,
119 'auth-auto-overlay-exact' => \$auth_auto_overlay_exact,
120 'auth-auto-overlay-1match' => \$auth_auto_overlay_1match,
121 'auth-auto-overlay-best-match' => \$auth_auto_overlay_best_match,
123 'net-server-config=s' => \$net_server_conf,
130 Path to OpenSRF configuration file.
133 Path to Net::Server configuration file. Defaults to $net_server_conf.
134 Only required if --spoolfile is not set.
137 Log additional details
140 Evergreen user account which performs the import actions.
143 Evergreen user account password
146 MARC data received via the network is stored in a temporary
147 file so Vandelay can access it. This must be a directory
148 the open-ils.vandelay service can access. If you want the
149 file deleted after completion, be sure open-ils.vandelay
150 has write access to the directory and the file.
151 This value defaults to the Vandelay data directory, however
152 this configuratoin value is only accessible when run from
153 the private opensrf domain, which you may not want to do.
156 Path to a MARC file to load. When a --spoolfile is specified,
157 this script will send the file to vandelay for processing,
158 then exit when complete. In other words, it does not stay
159 alive to accept requests from the network.
162 Amount of time in seconds this script will wait after receiving
163 a connection on the socket and before recieving a complete
164 MARC record. This prevents unintentional denial of service by
165 clients connecting and never sending anything.
168 ID of the vandelay bib record merge profile
171 ID of the vandelay bib record queue
177 ID of the vandelay authority record merge profile
180 ID of the vandelay authority record queue
182 --bib-import-no-match
183 --bib-auto-overlay-exact
184 --bib-auto-overlay-1match
185 --bib-auto-overlay-best-match
186 --auth-import-no-match
187 --auth-auto-overlay-exact
188 --auth-auto-overlay-1match
189 --auth-auto-overlay-best-match
191 Bib and auth import options which map directly to Vandelay import
195 Apply import-no-match to bibs and auto-overlay-exact to auths.
197 $0 --bib-import-no-match --auth-auto-overlay-exact
200 Show this help message
207 if ($import_no_match) {
208 warn "\nimport-no-match is deprecated; use bib-import-no-match\n";
209 $bib_import_no_match = $import_no_match;
211 if ($auto_overlay_exact) {
212 warn "\nauto-overlay-exact is deprecated; use bib-auto-overlay-exact\n";
213 $bib_auto_overlay_exact = $auto_overlay_exact;
215 if ($auto_overlay_1match) {
216 warn "\nauto-overlay-1match is deprecated; use bib-auto-overlay-1match\n";
217 $bib_auto_overlay_1match = $auto_overlay_1match;
219 if ($auto_overlay_best_match) {
220 warn "\nauto-overlay-best-match is deprecated; use bib-auto-overlay-best-match\n";
221 $bib_auto_overlay_best_match = $auto_overlay_best_match;
223 if ($deprecated_queue) {
224 warn "\n--queue is deprecated; use --bib-queue\n";
225 $bib_queue = $deprecated_queue;
229 die "--username AND --password required. --help for more info.\n"
230 unless $username and $password;
231 die "--bib-queue OR --auth-queue required. --help for more info.\n"
232 unless $bib_queue or $auth_queue;
235 return if $tempdir; # already read or user provided
236 $tempdir = OpenSRF::Utils::SettingsClient->new->config_value(
237 qw/apps open-ils.vandelay app_settings databases importer/
241 # Sets cur_rec_type to 'auth' if leader/06 of the first
242 # parseable record is 'z', otherwise 'bib'.
243 sub set_record_type {
244 my $file_name = shift;
246 my $marctype = 'USMARC';
247 open(F, $file_name) or
248 die "Unable to open MARC file $file_name : $!\n";
249 $marctype = 'XML' if (getc(F) =~ /^\D/o);
252 my $batch = new MARC::Batch ($marctype, $file_name);
258 eval {$rec = $batch->next};
259 next if $@; # record parse failure
261 $ldr_06 = substr($rec->leader(), 6, 1) || '';
265 $cur_rec_type = $ldr_06 eq 'z' ? 'auth' : 'bib';
267 $cur_queue = $cur_rec_type eq 'auth' ? $auth_queue : $bib_queue;
268 $cur_rec_source = $cur_rec_type eq 'auth' ? '' : $bib_source;
272 # set vandelay options based on command line ops and the type of record
273 # currently in process.
274 sub compile_vandelay_ops {
278 merge_profile => $cur_merge_profile ? $cur_merge_profile->id : undef
281 if ($cur_rec_type eq 'auth') {
282 $vl_ops->{import_no_match} = $auth_import_no_match;
283 $vl_ops->{auto_overlay_exact} = $auth_auto_overlay_exact;
284 $vl_ops->{auto_overlay_1match} = $auth_auto_overlay_1match;
285 $vl_ops->{auto_overlay_best_match} = $auth_auto_overlay_best_match;
287 $vl_ops->{import_no_match} = $bib_import_no_match;
288 $vl_ops->{auto_overlay_exact} = $bib_auto_overlay_exact;
289 $vl_ops->{auto_overlay_1match} = $bib_auto_overlay_1match;
290 $vl_ops->{auto_overlay_best_match} = $bib_auto_overlay_best_match;
293 # Default to exact match only if not other strategy is selected.
294 $vl_ops->{auto_overlay_exact} = 1
296 $vl_ops->{auto_overlay_1match} or
297 $vl_ops->{auto_overlay_best_match}
300 $logger->info("VL options: ".Dumper($vl_ops)) if $verbose;
305 my $file_name = shift; # filename
307 set_record_type($file_name);
309 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
310 my $req = $ses->request(
311 "open-ils.vandelay.$cur_rec_type.process_spool.stream_results",
312 $authtoken, undef, # cache key not needed
313 $cur_queue, 'import', $file_name, $cur_rec_source
317 while(my $resp = $req->recv) {
320 $logger->error("Error spooling MARC data: $resp");
322 } elsif($resp->content) {
323 push(@rec_ids, $resp->content);
330 sub import_queued_records {
332 my $vl_ops = compile_vandelay_ops();
334 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
335 my $req = $ses->request(
336 "open-ils.vandelay.${cur_rec_type}_record.list.import",
337 $authtoken, $rec_ids, $vl_ops
340 # collect the successfully imported vandelay records
343 while(my $resp = $req->recv) {
345 $logger->error("Error importing MARC data: $resp");
347 } elsif(my $data = $resp->content) {
349 if($data->{err_event}) {
351 $logger->error(Dumper($data->{err_event}));
354 } elsif ($data->{no_import}) {
355 # no errors, just didn't import, because of rules.
359 "record failed to satisfy Vandelay merge/quality/etc. ".
360 "requirements: " . ($data->{imported} || ''));
363 push(@cleanup_recs, $data->{imported}) if $data->{imported};
368 # clean up the successfully imported vandelay records to prevent queue bloat
369 my $pcrud = OpenSRF::AppSession->create('open-ils.pcrud');
371 $pcrud->request('open-ils.pcrud.transaction.begin', $authtoken)->recv;
374 my $api = 'open-ils.pcrud.delete.';
375 $api .= $cur_rec_type eq 'auth' ? 'vqar' : 'vqbr';
377 foreach (@cleanup_recs) {
379 $pcrud->request($api, $authtoken, $_)->recv;
383 $logger->error("Error deleting queued $cur_rec_type record $_: $@");
388 $pcrud->request('open-ils.pcrud.transaction.commit', $authtoken)->recv unless $err;
391 $logger->info("imported queued vandelay records: @cleanup_recs");
392 return (scalar(@cleanup_recs), $failed);
397 # Each child needs its own opensrf connection.
398 sub child_init_hook {
399 OpenSRF::System->bootstrap_client(config_file => $osrf_config);
400 Fieldmapper->import(IDL =>
401 OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
405 # The core Net::Server method
406 # Reads streams of MARC data from the network, saves the data as a file,
407 # then processes the file via vandelay.
408 sub process_request {
410 my $client = $self->{server}->{peeraddr}.':'.$self->{server}->{peerport};
412 $logger->info("$client opened a new connection");
414 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
415 if(!$ph->flush_socket()) {
416 $logger->error("We received a request, but we are no longer connected".
417 " to opensrf. Exiting and dropping request from $client");
423 local $SIG{ALRM} = sub { die "alarm\n" };
424 alarm $wait_time; # prevent accidental tie ups of backend processes
425 local $/ = "\x1D"; # MARC record separator
431 $logger->error("reading from STDIN failed or timed out: $@");
435 $logger->info("stream parser read " . length($data) . " bytes");
439 # copy data to a temporary file so vandelay can scoop it up
440 my ($handle, $tempfile) = tempfile("$0_XXXX", DIR => $tempdir)
441 or die "Cannot create tempfile in $tempdir : $!";
443 print $handle $data or die "Error writing to tempfile $tempfile : $!\n";
446 process_file($tempfile);
449 sub set_merge_profile {
453 return $cur_merge_profile = $bib_merge_profile_obj
454 if $bib_merge_profile_obj and $cur_rec_type eq 'bib';
456 return $cur_merge_profile = $auth_merge_profile_obj
457 if $auth_merge_profile_obj and $cur_rec_type eq 'auth';
459 # fetch un-cached profile
461 my $profile_id = $cur_rec_type eq 'bib' ?
462 $bib_merge_profile : $auth_merge_profile;
464 return $cur_merge_profile = undef unless $profile_id;
466 $cur_merge_profile = $apputils->simplereq(
468 'open-ils.pcrud.retrieve.vmp',
469 $authtoken, $profile_id);
471 # cache profile for later
473 $auth_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'auth';
474 $bib_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'bib';
480 new_auth_token(); # login
481 my $rec_ids = process_spool($file);
482 my ($imported, $failed) = import_queued_records($rec_ids);
484 if (oils_event_equals($imported, 'NO_SESSION')) {
485 # did the session expire while spooling?
486 new_auth_token(); # retry with new authtoken
487 ($imported, $failed) = import_queued_records($rec_ids);
490 oils_event_die($imported);
492 my $profile = $cur_merge_profile ? $cur_merge_profile->name : '';
494 $msg .= "Successfully imported $imported $cur_rec_type records ".
495 "using merge profile '$profile'\n" if $imported;
496 $msg .= "Failed to import $failed $cur_rec_type records\n" if $failed;
497 $msg .= "\x00" unless $spoolfile;
500 clear_auth_token(); # logout
503 # the authtoken will timeout after the configured inactivity period.
504 # When that happens, get a new one.
506 oils_login($username, $password, 'staff')
507 or die "Unable to login to Evergreen as user $username";
510 sub clear_auth_token {
511 $apputils->simplereq(
513 'open-ils.auth.session.delete',
519 # -- execution starts here
522 # individual files are processed in standalone mode.
523 # No Net::Server innards are necessary.
525 child_init_hook(); # force an opensrf connection
526 process_file($spoolfile);
530 # No spoolfile, run in Net::Server mode
534 WARNING: This script provides no security layer. Any client that has
535 access to the server+port can inject MARC records into the system.
540 $args{conf_file} = $net_server_conf if -r $net_server_conf;
541 $args{port} = $port if $port;
543 __PACKAGE__->run(%args);