2 # Copyright (C) 2008-2014 Equinox Software, Inc.
3 # Copyright (C) 2014 King County Library System
4 # Author: Bill Erickson <berickxx@gmail.com>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 # Sends MARC records, either from a file or from data delivered
18 # via the network, to open-ils.vandelay to be imported.
19 # ---------------------------------------------------------------
22 use Net::Server::PreFork;
23 use base qw/Net::Server::PreFork/;
25 require 'oils_header.pl';
26 use vars qw/$apputils $authtoken/;
31 use MARC::File::XML (BinaryEncoding => 'UTF-8');
32 use MARC::File::USMARC;
33 use File::Basename qw/fileparse/;
34 use File::Temp qw/tempfile/;
35 use OpenSRF::AppSession;
36 use OpenSRF::Utils::Logger qw/$logger/;
37 use OpenSRF::Transport::PeerHandle;
38 use OpenSRF::Utils::SettingsClient;
41 $Data::Dumper::Indent=0; # for logging
43 # This script will always be an entry point for opensrf,
44 # so go ahead and force log client.
45 $ENV{OSRF_LOG_CLIENT} = 1;
47 # these are updated with each new batch of records
53 my $cur_merge_profile; # this is an object
54 my $bib_merge_profile_obj;
55 my $auth_merge_profile_obj;
59 my $osrf_config = '/openils/conf/opensrf_core.xml';
66 my $bib_merge_profile;
67 my $auth_merge_profile;
73 my $bib_import_no_match;
74 my $bib_auto_overlay_exact;
75 my $bib_auto_overlay_1match;
76 my $bib_auto_overlay_best_match;
77 my $auth_import_no_match;
78 my $auth_auto_overlay_exact;
79 my $auth_auto_overlay_1match;
80 my $auth_auto_overlay_best_match;
82 # deprecated options; these map to their bib_* equivalents
84 my $auto_overlay_exact;
85 my $auto_overlay_1match;
86 my $auto_overlay_best_match;
91 my $net_server_conf = fileparse($0, '.pl').'.conf';
94 'osrf-config=s' => \$osrf_config,
95 'verbose' => \$verbose,
96 'username=s' => \$username,
97 'password=s' => \$password,
98 'tempdir=s' => \$tempdir,
99 'spoolfile=s' => \$spoolfile,
100 'wait=i' => \$wait_time,
101 'merge-profile=i' => \$bib_merge_profile,
102 'queue=i' => \$deprecated_queue,
103 'bib-queue=i' => \$bib_queue,
104 'source=i' => \$bib_source,
105 'auth-merge-profile=i' => \$auth_merge_profile,
106 'auth-queue=i' => \$auth_queue,
107 'auth-source=i' => \$auth_source,
110 'import-no-match' => \$import_no_match,
111 'auto-overlay-exact' => \$auto_overlay_exact,
112 'auto-overlay-1match' => \$auto_overlay_1match,
113 'auto-overlay-best-match' => \$auto_overlay_best_match,
116 'bib-import-no-match' => \$bib_import_no_match,
117 'bib-auto-overlay-exact' => \$bib_auto_overlay_exact,
118 'bib-auto-overlay-1match' => \$bib_auto_overlay_1match,
119 'bib-auto-overlay-best-match' => \$bib_auto_overlay_best_match,
120 'auth-import-no-match' => \$auth_import_no_match,
121 'auth-auto-overlay-exact' => \$auth_auto_overlay_exact,
122 'auth-auto-overlay-1match' => \$auth_auto_overlay_1match,
123 'auth-auto-overlay-best-match' => \$auth_auto_overlay_best_match,
125 'net-server-config=s' => \$net_server_conf,
132 Path to OpenSRF configuration file.
135 Path to Net::Server configuration file. Defaults to $net_server_conf.
136 Only required if --spoolfile is not set.
139 Log additional details
142 Evergreen user account which performs the import actions.
145 Evergreen user account password
148 MARC data received via the network is stored in a temporary
149 file so Vandelay can access it. This must be a directory
150 the open-ils.vandelay service can access. If you want the
151 file deleted after completion, be sure open-ils.vandelay
152 has write access to the directory and the file.
153 This value defaults to the Vandelay data directory, however
154 this configuratoin value is only accessible when run from
155 the private opensrf domain, which you may not want to do.
158 Path to a MARC file to load. When a --spoolfile is specified,
159 this script will send the file to vandelay for processing,
160 then exit when complete. In other words, it does not stay
161 alive to accept requests from the network.
164 Amount of time in seconds this script will wait after receiving
165 a connection on the socket and before recieving a complete
166 MARC record. This prevents unintentional denial of service by
167 clients connecting and never sending anything.
170 ID of the vandelay bib record merge profile
173 ID of the vandelay bib record queue
179 ID of the vandelay authority record merge profile
182 ID of the vandelay authority record queue
185 ID of the bib source for authority records
187 --bib-import-no-match
188 --bib-auto-overlay-exact
189 --bib-auto-overlay-1match
190 --bib-auto-overlay-best-match
191 --auth-import-no-match
192 --auth-auto-overlay-exact
193 --auth-auto-overlay-1match
194 --auth-auto-overlay-best-match
196 Bib and auth import options which map directly to Vandelay import
200 Apply import-no-match to bibs and auto-overlay-exact to auths.
202 $0 --bib-import-no-match --auth-auto-overlay-exact
205 Show this help message
212 if ($import_no_match) {
213 warn "\nimport-no-match is deprecated; use bib-import-no-match\n";
214 $bib_import_no_match = $import_no_match;
216 if ($auto_overlay_exact) {
217 warn "\nauto-overlay-exact is deprecated; use bib-auto-overlay-exact\n";
218 $bib_auto_overlay_exact = $auto_overlay_exact;
220 if ($auto_overlay_1match) {
221 warn "\nauto-overlay-1match is deprecated; use bib-auto-overlay-1match\n";
222 $bib_auto_overlay_1match = $auto_overlay_1match;
224 if ($auto_overlay_best_match) {
225 warn "\nauto-overlay-best-match is deprecated; use bib-auto-overlay-best-match\n";
226 $bib_auto_overlay_best_match = $auto_overlay_best_match;
228 if ($deprecated_queue) {
229 warn "\n--queue is deprecated; use --bib-queue\n";
230 $bib_queue = $deprecated_queue;
234 die "--username AND --password required. --help for more info.\n"
235 unless $username and $password;
236 die "--bib-queue OR --auth-queue required. --help for more info.\n"
237 unless $bib_queue or $auth_queue;
240 return if $tempdir; # already read or user provided
241 $tempdir = OpenSRF::Utils::SettingsClient->new->config_value(
242 qw/apps open-ils.vandelay app_settings databases importer/
246 # Sets cur_rec_type to 'auth' if leader/06 of the first
247 # parseable record is 'z', otherwise 'bib'.
248 sub set_record_type {
249 my $file_name = shift;
251 my $marctype = 'USMARC';
252 open(F, $file_name) or
253 die "Unable to open MARC file $file_name : $!\n";
254 $marctype = 'XML' if (getc(F) =~ /^\D/o);
257 my $batch = new MARC::Batch ($marctype, $file_name);
263 eval {$rec = $batch->next};
264 next if $@; # record parse failure
266 $ldr_06 = substr($rec->leader(), 6, 1) || '';
270 $cur_rec_type = $ldr_06 eq 'z' ? 'auth' : 'bib';
272 $cur_queue = $cur_rec_type eq 'auth' ? $auth_queue : $bib_queue;
273 $cur_rec_source = $cur_rec_type eq 'auth' ? $auth_source : $bib_source;
277 # set vandelay options based on command line ops and the type of record
278 # currently in process.
279 sub compile_vandelay_ops {
283 merge_profile => $cur_merge_profile ? $cur_merge_profile->id : undef
286 if ($cur_rec_type eq 'auth') {
287 $vl_ops->{import_no_match} = $auth_import_no_match;
288 $vl_ops->{auto_overlay_exact} = $auth_auto_overlay_exact;
289 $vl_ops->{auto_overlay_1match} = $auth_auto_overlay_1match;
290 $vl_ops->{auto_overlay_best_match} = $auth_auto_overlay_best_match;
292 $vl_ops->{import_no_match} = $bib_import_no_match;
293 $vl_ops->{auto_overlay_exact} = $bib_auto_overlay_exact;
294 $vl_ops->{auto_overlay_1match} = $bib_auto_overlay_1match;
295 $vl_ops->{auto_overlay_best_match} = $bib_auto_overlay_best_match;
298 # Default to exact match only if not other strategy is selected.
299 $vl_ops->{auto_overlay_exact} = 1
301 $vl_ops->{auto_overlay_1match} or
302 $vl_ops->{auto_overlay_best_match}
305 $logger->info("VL options: ".Dumper($vl_ops)) if $verbose;
310 my $file_name = shift; # filename
312 set_record_type($file_name);
314 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
315 my $req = $ses->request(
316 "open-ils.vandelay.$cur_rec_type.process_spool.stream_results",
317 $authtoken, undef, # cache key not needed
318 $cur_queue, 'import', $file_name, $cur_rec_source
322 while(my $resp = $req->recv) {
325 $logger->error("Error spooling MARC data: $resp");
327 } elsif($resp->content) {
328 push(@rec_ids, $resp->content);
335 sub import_queued_records {
337 my $vl_ops = compile_vandelay_ops();
339 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
340 my $req = $ses->request(
341 "open-ils.vandelay.${cur_rec_type}_record.list.import",
342 $authtoken, $rec_ids, $vl_ops
345 # collect the successfully imported vandelay records
348 while(my $resp = $req->recv) {
350 $logger->error("Error importing MARC data: $resp");
352 } elsif(my $data = $resp->content) {
354 if($data->{err_event}) {
356 $logger->error(Dumper($data->{err_event}));
359 } elsif ($data->{no_import}) {
360 # no errors, just didn't import, because of rules.
364 "record failed to satisfy Vandelay merge/quality/etc. ".
365 "requirements: " . ($data->{imported} || ''));
368 push(@cleanup_recs, $data->{imported}) if $data->{imported};
373 # clean up the successfully imported vandelay records to prevent queue bloat
374 my $pcrud = OpenSRF::AppSession->create('open-ils.pcrud');
376 $pcrud->request('open-ils.pcrud.transaction.begin', $authtoken)->recv;
379 foreach (@cleanup_recs) {
382 'open-ils.pcrud.delete.vqbr', $authtoken, $_)->recv;
386 $logger->error("Error deleteing queued bib record $_: $@");
391 $pcrud->request('open-ils.pcrud.transaction.commit', $authtoken)->recv unless $err;
394 $logger->info("imported queued vandelay records: @cleanup_recs");
395 return (scalar(@cleanup_recs), $failed);
400 # Each child needs its own opensrf connection.
401 sub child_init_hook {
402 OpenSRF::System->bootstrap_client(config_file => $osrf_config);
403 Fieldmapper->import(IDL =>
404 OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
408 # The core Net::Server method
409 # Reads streams of MARC data from the network, saves the data as a file,
410 # then processes the file via vandelay.
411 sub process_request {
413 my $client = $self->{server}->{peeraddr}.':'.$self->{server}->{peerport};
415 $logger->info("$client opened a new connection");
417 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
418 if(!$ph->flush_socket()) {
419 $logger->error("We received a request, but we are no longer connected".
420 " to opensrf. Exiting and dropping request from $client");
426 local $SIG{ALRM} = sub { die "alarm\n" };
427 alarm $wait_time; # prevent accidental tie ups of backend processes
428 local $/ = "\x1D"; # MARC record separator
434 $logger->error("reading from STDIN failed or timed out: $@");
438 $logger->info("stream parser read " . length($data) . " bytes");
442 # copy data to a temporary file so vandelay can scoop it up
443 my ($handle, $tempfile) = tempfile("$0_XXXX", DIR => $tempdir)
444 or die "Cannot create tempfile in $tempdir : $!";
446 print $handle $data or die "Error writing to tempfile $tempfile : $!\n";
449 process_file($tempfile);
452 sub set_merge_profile {
456 return $cur_merge_profile = $bib_merge_profile_obj
457 if $bib_merge_profile_obj and $cur_rec_type eq 'bib';
459 return $cur_merge_profile = $auth_merge_profile_obj
460 if $auth_merge_profile_obj and $cur_rec_type eq 'auth';
462 # fetch un-cached profile
464 my $profile_id = $cur_rec_type eq 'bib' ?
465 $bib_merge_profile : $auth_merge_profile;
467 return $cur_merge_profile = undef unless $profile_id;
469 $cur_merge_profile = $apputils->simplereq(
471 'open-ils.pcrud.retrieve.vmp',
472 $authtoken, $profile_id);
474 # cache profile for later
476 $auth_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'auth';
477 $bib_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'bib';
483 new_auth_token(); # login
484 my $rec_ids = process_spool($file);
485 my ($imported, $failed) = import_queued_records($rec_ids);
487 if (oils_event_equals($imported, 'NO_SESSION')) {
488 # did the session expire while spooling?
489 new_auth_token(); # retry with new authtoken
490 ($imported, $failed) = import_queued_records($rec_ids);
493 oils_event_die($imported);
495 my $profile = $cur_merge_profile ? $cur_merge_profile->name : '';
497 $msg .= "Successfully imported $imported $cur_rec_type records ".
498 "using merge profile '$profile'\n" if $imported;
499 $msg .= "Failed to import $failed $cur_rec_type records\n" if $failed;
503 clear_auth_token(); # logout
506 # the authtoken will timeout after the configured inactivity period.
507 # When that happens, get a new one.
509 oils_login($username, $password, 'staff')
510 or die "Unable to login to Evergreen as user $username";
513 sub clear_auth_token {
514 $apputils->simplereq(
516 'open-ils.auth.session.delete',
522 # -- execution starts here
525 # individual files are processed in standalone mode.
526 # No Net::Server innards are necessary.
528 child_init_hook(); # force an opensrf connection
529 process_file($spoolfile);
533 # No spoolfile, run in Net::Server mode
537 WARNING: This script provides no security layer. Any client that has
538 access to the server+port can inject MARC records into the system.
543 $args{conf_file} = $net_server_conf if -r $net_server_conf;
544 $args{port} = $port if $port;
546 __PACKAGE__->run(%args);