2 # Copyright (C) 2008-2014 Equinox Software, Inc.
3 # Copyright (C) 2014 King County Library System
4 # Author: Bill Erickson <berickxx@gmail.com>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 # Sends MARC records, either from a file or from data delivered
18 # via the network, to open-ils.vandelay to be imported.
19 # ---------------------------------------------------------------
22 use Net::Server::PreFork;
23 use base qw/Net::Server::PreFork/;
25 require 'oils_header.pl';
26 use vars qw/$apputils $authtoken/;
31 use MARC::File::XML (BinaryEncoding => 'UTF-8');
32 use MARC::File::USMARC;
33 use File::Basename qw/fileparse/;
34 use File::Temp qw/tempfile/;
35 use OpenSRF::AppSession;
36 use OpenSRF::Utils::Logger qw/$logger/;
37 use OpenSRF::Transport::PeerHandle;
38 use OpenSRF::Utils::SettingsClient;
41 $Data::Dumper::Indent=0; # for logging
43 # This script will always be an entry point for opensrf,
44 # so go ahead and force log client.
45 $ENV{OSRF_LOG_CLIENT} = 1;
47 # these are updated with each new batch of records
53 my $cur_merge_profile; # this is an object
54 my $bib_merge_profile_obj;
55 my $auth_merge_profile_obj;
59 my $osrf_config = '/openils/conf/opensrf_core.xml';
66 my $bib_merge_profile;
67 my $auth_merge_profile;
73 my $bib_import_no_match;
74 my $bib_auto_overlay_exact;
75 my $bib_auto_overlay_1match;
76 my $bib_auto_overlay_best_match;
77 my $auth_import_no_match;
78 my $auth_auto_overlay_exact;
79 my $auth_auto_overlay_1match;
80 my $auth_auto_overlay_best_match;
82 # deprecated options; these map to their bib_* equivalents
84 my $auto_overlay_exact;
85 my $auto_overlay_1match;
86 my $auto_overlay_best_match;
90 my $net_server_conf = fileparse($0, '.pl').'.conf';
93 'osrf-config=s' => \$osrf_config,
94 'verbose' => \$verbose,
95 'username=s' => \$username,
96 'password=s' => \$password,
97 'tempdir=s' => \$tempdir,
98 'spoolfile=s' => \$spoolfile,
99 'wait=i' => \$wait_time,
100 'merge-profile=i' => \$bib_merge_profile,
101 'queue=i' => \$bib_queue,
102 'source=i' => \$bib_source,
103 'auth-merge-profile=i' => \$auth_merge_profile,
104 'auth-queue=i' => \$auth_queue,
105 'auth-source=i' => \$auth_source,
108 'import-no-match' => \$import_no_match,
109 'auto-overlay-exact' => \$auto_overlay_exact,
110 'auto-overlay-1match' => \$auto_overlay_1match,
111 'auto-overlay-best-match' => \$auto_overlay_best_match,
114 'bib-import-no-match' => \$bib_import_no_match,
115 'bib-auto-overlay-exact' => \$bib_auto_overlay_exact,
116 'bib-auto-overlay-1match' => \$bib_auto_overlay_1match,
117 'bib-auto-overlay-best-match' => \$bib_auto_overlay_best_match,
118 'auth-import-no-match' => \$auth_import_no_match,
119 'auth-auto-overlay-exact' => \$auth_auto_overlay_exact,
120 'auth-auto-overlay-1match' => \$auth_auto_overlay_1match,
121 'auth-auto-overlay-best-match' => \$auth_auto_overlay_best_match,
123 'net-server-config=s' => \$net_server_conf,
130 Path to OpenSRF configuration file.
133 Path to Net::Server configuration file. Defaults to $net_server_conf.
134 Only required if --spoolfile is not set.
137 Log additional details
140 Evergreen user account which performs the import actions.
143 Evergreen user account password
146 MARC data received via the network is stored in a temporary
147 file so Vandelay can access it. This must be a directory
148 the open-ils.vandelay service can access. If you want the
149 file deleted after completion, be sure open-ils.vandelay
150 has write access to the directory and the file.
151 This value defaults to the Vandelay data directory, however
152 this configuratoin value is only accessible when run from
153 the private opensrf domain, which you may not want to do.
156 Path to a MARC file to load. When a --spoolfile is specified,
157 this script will send the file to vandelay for processing,
158 then exit when complete. In other words, it does not stay
159 alive to accept requests from the network.
162 Amount of time in seconds this script will wait after receiving
163 a connection on the socket and before recieving a complete
164 MARC record. This prevents unintentional denial of service by
165 clients connecting and never sending anything.
168 ID of the vandelay bib record merge profile
171 ID of the vandelay bib record queue
177 ID of the vandelay authority record merge profile
180 ID of the vandelay authority record queue
183 ID of the bib source for authority records
185 --bib-import-no-match
186 --bib-auto-overlay-exact
187 --bib-auto-overlay-1match
188 --bib-auto-overlay-best-match
189 --auth-import-no-match
190 --auth-auto-overlay-exact
191 --auth-auto-overlay-1match
192 --auth-auto-overlay-best-match
194 Bib and auth import options which map directly to Vandelay import
198 Apply import-no-match to bibs and auto-overlay-exact to auths.
200 $0 --bib-import-no-match --auth-auto-overlay-exact
203 Show this help message
210 if ($import_no_match) {
211 warn "\nimport-no-match is deprecated; use bib-import-no-match\n";
212 $bib_import_no_match = $import_no_match;
214 if ($auto_overlay_exact) {
215 warn "\nauto-overlay-exact is deprecated; use bib-auto-overlay-exact\n";
216 $bib_auto_overlay_exact = $auto_overlay_exact;
218 if ($auto_overlay_1match) {
219 warn "\nauto-overlay-1match is deprecated; use bib-auto-overlay-1match\n";
220 $bib_auto_overlay_1match = $auto_overlay_1match;
222 if ($auto_overlay_best_match) {
223 warn "\nauto-overlay-best-match is deprecated; use bib-auto-overlay-best-match\n";
224 $bib_auto_overlay_best_match = $auto_overlay_best_match;
227 die "--username AND --password required. --help for more info.\n"
228 unless $username and $password;
229 die "--bib-queue OR --auth-queue required. --help for more info.\n"
230 unless $bib_queue or $auth_queue;
233 return if $tempdir; # already read or user provided
234 $tempdir = OpenSRF::Utils::SettingsClient->new->config_value(
235 qw/apps open-ils.vandelay app_settings databases importer/
239 # Sets cur_rec_type to 'auth' if leader/06 of the first
240 # parseable record is 'z', otherwise 'bib'.
241 sub set_record_type {
242 my $file_name = shift;
244 my $marctype = 'USMARC';
245 open(F, $file_name) or
246 die "Unable to open MARC file $file_name : $!\n";
247 $marctype = 'XML' if (getc(F) =~ /^\D/o);
250 my $batch = new MARC::Batch ($marctype, $file_name);
256 eval {$rec = $batch->next};
257 next if $@; # record parse failure
259 $ldr_06 = substr($rec->leader(), 6, 1) || '';
263 $cur_rec_type = $ldr_06 eq 'z' ? 'auth' : 'bib';
265 $cur_queue = $cur_rec_type eq 'auth' ? $auth_queue : $bib_queue;
266 $cur_rec_source = $cur_rec_type eq 'auth' ? $auth_source : $bib_source;
270 # set vandelay options based on command line ops and the type of record
271 # currently in process.
272 sub compile_vandelay_ops {
276 merge_profile => $cur_merge_profile ? $cur_merge_profile->id : undef
279 if ($cur_rec_type eq 'auth') {
280 $vl_ops->{import_no_match} = $auth_import_no_match;
281 $vl_ops->{auto_overlay_exact} = $auth_auto_overlay_exact;
282 $vl_ops->{auto_overlay_1match} = $auth_auto_overlay_1match;
283 $vl_ops->{auto_overlay_best_match} = $auth_auto_overlay_best_match;
285 $vl_ops->{import_no_match} = $bib_import_no_match;
286 $vl_ops->{auto_overlay_exact} = $bib_auto_overlay_exact;
287 $vl_ops->{auto_overlay_1match} = $bib_auto_overlay_1match;
288 $vl_ops->{auto_overlay_best_match} = $bib_auto_overlay_best_match;
291 # Default to exact match only if not other strategy is selected.
292 $vl_ops->{auto_overlay_exact} = 1
294 $vl_ops->{auto_overlay_1match} or
295 $vl_ops->{auto_overlay_best_match}
298 $logger->info("VL options: ".Dumper($vl_ops)) if $verbose;
303 my $file_name = shift; # filename
305 set_record_type($file_name);
307 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
308 my $req = $ses->request(
309 "open-ils.vandelay.$cur_rec_type.process_spool.stream_results",
310 $authtoken, undef, # cache key not needed
311 $cur_queue, 'import', $file_name, $cur_rec_source
315 while(my $resp = $req->recv) {
318 $logger->error("Error spooling MARC data: $resp");
320 } elsif($resp->content) {
321 push(@rec_ids, $resp->content);
328 sub import_queued_records {
330 my $vl_ops = compile_vandelay_ops();
332 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
333 my $req = $ses->request(
334 "open-ils.vandelay.${cur_rec_type}_record.list.import",
335 $authtoken, $rec_ids, $vl_ops
338 # collect the successfully imported vandelay records
341 while(my $resp = $req->recv) {
343 $logger->error("Error importing MARC data: $resp");
345 } elsif(my $data = $resp->content) {
347 if($data->{err_event}) {
349 $logger->error(Dumper($data->{err_event}));
353 push(@cleanup_recs, $data->{imported}) if $data->{imported};
358 # clean up the successfully imported vandelay records to prevent queue bloat
359 my $pcrud = OpenSRF::AppSession->create('open-ils.pcrud');
361 $pcrud->request('open-ils.pcrud.transaction.begin', $authtoken)->recv;
364 foreach (@cleanup_recs) {
367 'open-ils.pcrud.delete.vqbr', $authtoken, $_)->recv;
371 $logger->error("Error deleteing queued bib record $_: $@");
376 $pcrud->request('open-ils.pcrud.transaction.commit', $authtoken)->recv unless $err;
379 $logger->info("imported queued vandelay records: @cleanup_recs");
380 return (scalar(@cleanup_recs), $failed);
385 # Each child needs its own opensrf connection.
386 sub child_init_hook {
387 OpenSRF::System->bootstrap_client(config_file => $osrf_config);
388 Fieldmapper->import(IDL =>
389 OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
393 # The core Net::Server method
394 # Reads streams of MARC data from the network, saves the data as a file,
395 # then processes the file via vandelay.
396 sub process_request {
398 my $client = $self->{server}->{peeraddr}.':'.$self->{server}->{peerport};
400 $logger->info("$client opened a new connection");
402 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
403 if(!$ph->flush_socket()) {
404 $logger->error("We received a request, but we are no longer connected".
405 " to opensrf. Exiting and dropping request from $client");
411 local $SIG{ALRM} = sub { die "alarm\n" };
412 alarm $wait_time; # prevent accidental tie ups of backend processes
413 local $/ = "\x1D"; # MARC record separator
419 $logger->error("reading from STDIN failed or timed out: $@");
423 $logger->info("stream parser read " . length($data) . " bytes");
427 # copy data to a temporary file so vandelay can scoop it up
428 my ($handle, $tempfile) = tempfile("$0_XXXX", DIR => $tempdir)
429 or die "Cannot create tempfile in $tempdir : $!";
431 print $handle $data or die "Error writing to tempfile $tempfile : $!\n";
434 process_file($tempfile);
437 sub set_merge_profile {
441 return $cur_merge_profile = $bib_merge_profile_obj
442 if $bib_merge_profile_obj and $cur_rec_type eq 'bib';
444 return $cur_merge_profile = $auth_merge_profile_obj
445 if $auth_merge_profile_obj and $cur_rec_type eq 'auth';
447 # fetch un-cached profile
449 my $profile_id = $cur_rec_type eq 'bib' ?
450 $bib_merge_profile : $auth_merge_profile;
452 return $cur_merge_profile = undef unless $profile_id;
454 $cur_merge_profile = $apputils->simplereq(
456 'open-ils.pcrud.retrieve.vmp',
457 $authtoken, $profile_id);
459 # cache profile for later
461 $auth_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'auth';
462 $bib_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'bib';
468 new_auth_token(); # login
469 my $rec_ids = process_spool($file);
470 my ($imported, $failed) = import_queued_records($rec_ids);
472 if (oils_event_equals($imported, 'NO_SESSION')) {
473 # did the session expire while spooling?
474 new_auth_token(); # retry with new authtoken
475 ($imported, $failed) = import_queued_records($rec_ids);
478 oils_event_die($imported);
480 my $profile = $cur_merge_profile ? $cur_merge_profile->name : '';
482 $msg .= "Successfully imported $imported $cur_rec_type records ".
483 "using merge profile '$profile'\n" if $imported;
484 $msg .= "Failed to import $failed $cur_rec_type records\n" if $failed;
488 clear_auth_token(); # logout
491 # the authtoken will timeout after the configured inactivity period.
492 # When that happens, get a new one.
494 oils_login($username, $password, 'staff')
495 or die "Unable to login to Evergreen as user $username";
498 sub clear_auth_token {
499 $apputils->simplereq(
501 'open-ils.auth.session.delete',
507 # -- execution starts here
510 # individual files are processed in standalone mode.
511 # No Net::Server innards are necessary.
513 child_init_hook(); # force an opensrf connection
514 process_file($spoolfile);
518 # No spoolfile, run in Net::Server mode
522 WARNING: This script provides no security layer. Any client that has
523 access to the server+port can inject MARC records into the system.
528 $args{conf_file} = $net_server_conf if -r $net_server_conf;
529 $args{port} = $port if $port;
531 __PACKAGE__->run(%args);