2 # Copyright (C) 2008-2014 Equinox Software, Inc.
3 # Copyright (C) 2014 King County Library System
4 # Author: Bill Erickson <berickxx@gmail.com>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 # Sends MARC records, either from a file or from data delivered
18 # via the network, to open-ils.vandelay to be imported.
19 # ---------------------------------------------------------------
22 use Net::Server::PreFork;
23 use base qw/Net::Server::PreFork/;
25 require 'oils_header.pl';
26 use vars qw/$apputils $authtoken/;
31 use MARC::File::XML (BinaryEncoding => 'UTF-8');
32 use MARC::File::USMARC;
33 use File::Basename qw/fileparse/;
34 use File::Temp qw/tempfile/;
35 use OpenSRF::AppSession;
36 use OpenSRF::Utils::Logger qw/$logger/;
37 use OpenSRF::Transport::PeerHandle;
38 use OpenSRF::Utils::SettingsClient;
41 $Data::Dumper::Indent=0; # for logging
43 # This script will always be an entry point for opensrf,
44 # so go ahead and force log client.
45 $ENV{OSRF_LOG_CLIENT} = 1;
47 # these are updated with each new batch of records
53 my $cur_merge_profile; # this is an object
54 my $bib_merge_profile_obj;
55 my $auth_merge_profile_obj;
59 my $osrf_config = '/openils/conf/opensrf_core.xml';
67 my $bib_merge_profile;
68 my $auth_merge_profile;
73 my $bib_import_no_match;
74 my $bib_auto_overlay_exact;
75 my $bib_auto_overlay_1match;
76 my $bib_auto_overlay_best_match;
77 my $auth_import_no_match;
78 my $auth_auto_overlay_exact;
79 my $auth_auto_overlay_1match;
80 my $auth_auto_overlay_best_match;
82 # deprecated options; these map to their bib_* equivalents
84 my $auto_overlay_exact;
85 my $auto_overlay_1match;
86 my $auto_overlay_best_match;
91 my $net_server_conf = fileparse($0, '.pl').'.conf';
94 'osrf-config=s' => \$osrf_config,
95 'verbose' => \$verbose,
96 'username=s' => \$username,
97 'password=s' => \$password,
98 'workstation' => \$workstation,
99 'tempdir=s' => \$tempdir,
100 'spoolfile=s' => \$spoolfile,
101 'wait=i' => \$wait_time,
102 'merge-profile=i' => \$bib_merge_profile,
103 'queue=i' => \$deprecated_queue,
104 'bib-queue=i' => \$bib_queue,
105 'source=i' => \$bib_source,
106 'auth-merge-profile=i' => \$auth_merge_profile,
107 'auth-queue=i' => \$auth_queue,
110 'import-no-match' => \$import_no_match,
111 'auto-overlay-exact' => \$auto_overlay_exact,
112 'auto-overlay-1match' => \$auto_overlay_1match,
113 'auto-overlay-best-match' => \$auto_overlay_best_match,
116 'bib-import-no-match' => \$bib_import_no_match,
117 'bib-auto-overlay-exact' => \$bib_auto_overlay_exact,
118 'bib-auto-overlay-1match' => \$bib_auto_overlay_1match,
119 'bib-auto-overlay-best-match' => \$bib_auto_overlay_best_match,
120 'auth-import-no-match' => \$auth_import_no_match,
121 'auth-auto-overlay-exact' => \$auth_auto_overlay_exact,
122 'auth-auto-overlay-1match' => \$auth_auto_overlay_1match,
123 'auth-auto-overlay-best-match' => \$auth_auto_overlay_best_match,
125 'net-server-config=s' => \$net_server_conf,
132 Path to OpenSRF configuration file.
135 Path to Net::Server configuration file. Defaults to $net_server_conf.
136 Only required if --spoolfile is not set.
139 Log additional details
142 Evergreen user account which performs the import actions.
145 Evergreen user account password
148 Evergreen workstation
151 MARC data received via the network is stored in a temporary
152 file so Vandelay can access it. This must be a directory
153 the open-ils.vandelay service can access. If you want the
154 file deleted after completion, be sure open-ils.vandelay
155 has write access to the directory and the file.
156 This value defaults to the Vandelay data directory, however
157 this configuratoin value is only accessible when run from
158 the private opensrf domain, which you may not want to do.
161 Path to a MARC file to load. When a --spoolfile is specified,
162 this script will send the file to vandelay for processing,
163 then exit when complete. In other words, it does not stay
164 alive to accept requests from the network.
167 Amount of time in seconds this script will wait after receiving
168 a connection on the socket and before recieving a complete
169 MARC record. This prevents unintentional denial of service by
170 clients connecting and never sending anything.
173 ID of the vandelay bib record merge profile
176 ID of the vandelay bib record queue
182 ID of the vandelay authority record merge profile
185 ID of the vandelay authority record queue
187 --bib-import-no-match
188 --bib-auto-overlay-exact
189 --bib-auto-overlay-1match
190 --bib-auto-overlay-best-match
191 --auth-import-no-match
192 --auth-auto-overlay-exact
193 --auth-auto-overlay-1match
194 --auth-auto-overlay-best-match
196 Bib and auth import options which map directly to Vandelay import
200 Apply import-no-match to bibs and auto-overlay-exact to auths.
202 $0 --bib-import-no-match --auth-auto-overlay-exact
205 Show this help message
212 if ($import_no_match) {
213 warn "\nimport-no-match is deprecated; use bib-import-no-match\n";
214 $bib_import_no_match = $import_no_match;
216 if ($auto_overlay_exact) {
217 warn "\nauto-overlay-exact is deprecated; use bib-auto-overlay-exact\n";
218 $bib_auto_overlay_exact = $auto_overlay_exact;
220 if ($auto_overlay_1match) {
221 warn "\nauto-overlay-1match is deprecated; use bib-auto-overlay-1match\n";
222 $bib_auto_overlay_1match = $auto_overlay_1match;
224 if ($auto_overlay_best_match) {
225 warn "\nauto-overlay-best-match is deprecated; use bib-auto-overlay-best-match\n";
226 $bib_auto_overlay_best_match = $auto_overlay_best_match;
228 if ($deprecated_queue) {
229 warn "\n--queue is deprecated; use --bib-queue\n";
230 $bib_queue = $deprecated_queue;
234 die "--username, --password, AND --workstation required. --help for more info.\n"
235 unless $username and $password and $workstation;
236 die "--bib-queue OR --auth-queue required. --help for more info.\n"
237 unless $bib_queue or $auth_queue;
240 return if $tempdir; # already read or user provided
241 $tempdir = OpenSRF::Utils::SettingsClient->new->config_value(
242 qw/apps open-ils.vandelay app_settings databases importer/
246 # Sets cur_rec_type to 'auth' if leader/06 of the first
247 # parseable record is 'z', otherwise 'bib'.
248 sub set_record_type {
249 my $file_name = shift;
251 my $marctype = 'USMARC';
252 open(F, $file_name) or
253 die "Unable to open MARC file $file_name : $!\n";
254 $marctype = 'XML' if (getc(F) =~ /^\D/o);
257 my $batch = new MARC::Batch ($marctype, $file_name);
263 eval {$rec = $batch->next};
264 next if $@; # record parse failure
266 $ldr_06 = substr($rec->leader(), 6, 1) || '';
270 $cur_rec_type = $ldr_06 eq 'z' ? 'auth' : 'bib';
272 $cur_queue = $cur_rec_type eq 'auth' ? $auth_queue : $bib_queue;
273 $cur_rec_source = $cur_rec_type eq 'auth' ? '' : $bib_source;
277 # set vandelay options based on command line ops and the type of record
278 # currently in process.
279 sub compile_vandelay_ops {
283 merge_profile => $cur_merge_profile ? $cur_merge_profile->id : undef
286 if ($cur_rec_type eq 'auth') {
287 $vl_ops->{import_no_match} = $auth_import_no_match;
288 $vl_ops->{auto_overlay_exact} = $auth_auto_overlay_exact;
289 $vl_ops->{auto_overlay_1match} = $auth_auto_overlay_1match;
290 $vl_ops->{auto_overlay_best_match} = $auth_auto_overlay_best_match;
292 $vl_ops->{import_no_match} = $bib_import_no_match;
293 $vl_ops->{auto_overlay_exact} = $bib_auto_overlay_exact;
294 $vl_ops->{auto_overlay_1match} = $bib_auto_overlay_1match;
295 $vl_ops->{auto_overlay_best_match} = $bib_auto_overlay_best_match;
298 # Default to exact match only if not other strategy is selected.
299 $vl_ops->{auto_overlay_exact} = 1
301 $vl_ops->{auto_overlay_1match} or
302 $vl_ops->{auto_overlay_best_match}
305 $logger->info("VL options: ".Dumper($vl_ops)) if $verbose;
310 my $file_name = shift; # filename
312 set_record_type($file_name);
314 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
315 my $req = $ses->request(
316 "open-ils.vandelay.$cur_rec_type.process_spool.stream_results",
317 $authtoken, undef, # cache key not needed
318 $cur_queue, 'import', $file_name, $cur_rec_source
322 while(my $resp = $req->recv) {
325 $logger->error("Error spooling MARC data: $resp");
327 } elsif($resp->content) {
328 push(@rec_ids, $resp->content);
335 sub import_queued_records {
337 my $vl_ops = compile_vandelay_ops();
339 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
340 my $req = $ses->request(
341 "open-ils.vandelay.${cur_rec_type}_record.list.import",
342 $authtoken, $rec_ids, $vl_ops
345 # collect the successfully imported vandelay records
348 while(my $resp = $req->recv) {
350 $logger->error("Error importing MARC data: $resp");
352 } elsif(my $data = $resp->content) {
354 if($data->{err_event}) {
356 $logger->error(Dumper($data->{err_event}));
359 } elsif ($data->{no_import}) {
360 # no errors, just didn't import, because of rules.
364 "record failed to satisfy Vandelay merge/quality/etc. ".
365 "requirements: " . ($data->{imported} || ''));
368 push(@cleanup_recs, $data->{imported}) if $data->{imported};
373 # clean up the successfully imported vandelay records to prevent queue bloat
374 my $pcrud = OpenSRF::AppSession->create('open-ils.pcrud');
376 $pcrud->request('open-ils.pcrud.transaction.begin', $authtoken)->recv;
379 my $api = 'open-ils.pcrud.delete.';
380 $api .= $cur_rec_type eq 'auth' ? 'vqar' : 'vqbr';
382 foreach (@cleanup_recs) {
384 $pcrud->request($api, $authtoken, $_)->recv;
388 $logger->error("Error deleting queued $cur_rec_type record $_: $@");
393 $pcrud->request('open-ils.pcrud.transaction.commit', $authtoken)->recv unless $err;
396 $logger->info("imported queued vandelay records: @cleanup_recs");
397 return (scalar(@cleanup_recs), $failed);
402 # Each child needs its own opensrf connection.
403 sub child_init_hook {
404 OpenSRF::System->bootstrap_client(config_file => $osrf_config);
405 Fieldmapper->import(IDL =>
406 OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
410 # The core Net::Server method
411 # Reads streams of MARC data from the network, saves the data as a file,
412 # then processes the file via vandelay.
413 sub process_request {
415 my $client = $self->{server}->{peeraddr}.':'.$self->{server}->{peerport};
417 $logger->info("$client opened a new connection");
419 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
420 if(!$ph->flush_socket()) {
421 $logger->error("We received a request, but we are no longer connected".
422 " to opensrf. Exiting and dropping request from $client");
428 local $SIG{ALRM} = sub { die "alarm\n" };
429 alarm $wait_time; # prevent accidental tie ups of backend processes
430 local $/ = "\x1D"; # MARC record separator
436 $logger->error("reading from STDIN failed or timed out: $@");
440 $logger->info("stream parser read " . length($data) . " bytes");
444 # copy data to a temporary file so vandelay can scoop it up
445 my ($handle, $tempfile) = tempfile("$0_XXXX", DIR => $tempdir)
446 or die "Cannot create tempfile in $tempdir : $!";
448 print $handle $data or die "Error writing to tempfile $tempfile : $!\n";
451 process_file($tempfile);
454 sub set_merge_profile {
458 return $cur_merge_profile = $bib_merge_profile_obj
459 if $bib_merge_profile_obj and $cur_rec_type eq 'bib';
461 return $cur_merge_profile = $auth_merge_profile_obj
462 if $auth_merge_profile_obj and $cur_rec_type eq 'auth';
464 # fetch un-cached profile
466 my $profile_id = $cur_rec_type eq 'bib' ?
467 $bib_merge_profile : $auth_merge_profile;
469 return $cur_merge_profile = undef unless $profile_id;
471 $cur_merge_profile = $apputils->simplereq(
473 'open-ils.pcrud.retrieve.vmp',
474 $authtoken, $profile_id);
476 # cache profile for later
478 $auth_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'auth';
479 $bib_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'bib';
485 new_auth_token(); # login
486 my $rec_ids = process_spool($file);
487 my ($imported, $failed) = import_queued_records($rec_ids);
489 if (oils_event_equals($imported, 'NO_SESSION')) {
490 # did the session expire while spooling?
491 new_auth_token(); # retry with new authtoken
492 ($imported, $failed) = import_queued_records($rec_ids);
495 oils_event_die($imported);
497 my $profile = $cur_merge_profile ? $cur_merge_profile->name : '';
499 $msg .= "Successfully imported $imported $cur_rec_type records ".
500 "using merge profile '$profile'\n" if $imported;
501 $msg .= "Failed to import $failed $cur_rec_type records\n" if $failed;
502 $msg .= "\x00" unless $spoolfile;
505 clear_auth_token(); # logout
508 # the authtoken will timeout after the configured inactivity period.
509 # When that happens, get a new one.
511 oils_login($username, $password, 'staff', $workstation)
512 or die "Unable to login to Evergreen as user $username";
515 sub clear_auth_token {
516 $apputils->simplereq(
518 'open-ils.auth.session.delete',
524 # -- execution starts here
527 # individual files are processed in standalone mode.
528 # No Net::Server innards are necessary.
530 child_init_hook(); # force an opensrf connection
531 process_file($spoolfile);
535 # No spoolfile, run in Net::Server mode
539 WARNING: This script provides no security layer. Any client that has
540 access to the server+port can inject MARC records into the system.
545 $args{conf_file} = $net_server_conf if -r $net_server_conf;
546 $args{port} = $port if $port;
548 __PACKAGE__->run(%args);