2 # Copyright (C) 2008-2014 Equinox Software, Inc.
3 # Copyright (C) 2014 King County Library System
4 # Author: Bill Erickson <berickxx@gmail.com>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 # Sends MARC records, either from a file or from data delivered
18 # via the network, to open-ils.vandelay to be imported.
19 # ---------------------------------------------------------------
22 use Net::Server::PreFork;
23 use base qw/Net::Server::PreFork/;
25 require "$FindBin::Bin/oils_header.pl";
27 use vars qw/$apputils $authtoken/;
32 use MARC::File::XML (BinaryEncoding => 'UTF-8');
33 use MARC::File::USMARC;
34 use File::Basename qw/fileparse/;
35 use File::Temp qw/tempfile/;
36 use OpenSRF::AppSession;
37 use OpenSRF::Utils::Logger qw/$logger/;
38 use OpenSRF::Transport::PeerHandle;
39 use OpenSRF::Utils::SettingsClient;
42 $Data::Dumper::Indent=0; # for logging
44 # This script will always be an entry point for opensrf,
45 # so go ahead and force log client.
46 $ENV{OSRF_LOG_CLIENT} = 1;
48 # these are updated with each new batch of records
54 my $cur_merge_profile; # this is an object
55 my $bib_merge_profile_obj;
56 my $auth_merge_profile_obj;
60 my $osrf_config = '@sysconfdir@/opensrf_core.xml';
68 my $bib_merge_profile;
69 my $auth_merge_profile;
74 my $bib_import_no_match;
75 my $bib_auto_overlay_exact;
76 my $bib_auto_overlay_1match;
77 my $bib_auto_overlay_best_match;
78 my $auth_import_no_match;
79 my $auth_auto_overlay_exact;
80 my $auth_auto_overlay_1match;
81 my $auth_auto_overlay_best_match;
83 # deprecated options; these map to their bib_* equivalents
85 my $auto_overlay_exact;
86 my $auto_overlay_1match;
87 my $auto_overlay_best_match;
92 my $net_server_conf = (-r "@sysconfdir@/marc_stream_importer.conf") ? "@sysconfdir@/marc_stream_importer.conf" : undef;
95 'osrf-config=s' => \$osrf_config,
96 'verbose' => \$verbose,
97 'username=s' => \$username,
98 'password=s' => \$password,
99 'workstation=s' => \$workstation,
100 'tempdir=s' => \$tempdir,
101 'spoolfile=s' => \$spoolfile,
102 'wait=i' => \$wait_time,
103 'merge-profile=i' => \$bib_merge_profile,
104 'queue=i' => \$deprecated_queue,
105 'bib-queue=i' => \$bib_queue,
106 'source=i' => \$bib_source,
107 'auth-merge-profile=i' => \$auth_merge_profile,
108 'auth-queue=i' => \$auth_queue,
111 'import-no-match' => \$import_no_match,
112 'auto-overlay-exact' => \$auto_overlay_exact,
113 'auto-overlay-1match' => \$auto_overlay_1match,
114 'auto-overlay-best-match' => \$auto_overlay_best_match,
117 'bib-import-no-match' => \$bib_import_no_match,
118 'bib-auto-overlay-exact' => \$bib_auto_overlay_exact,
119 'bib-auto-overlay-1match' => \$bib_auto_overlay_1match,
120 'bib-auto-overlay-best-match' => \$bib_auto_overlay_best_match,
121 'auth-import-no-match' => \$auth_import_no_match,
122 'auth-auto-overlay-exact' => \$auth_auto_overlay_exact,
123 'auth-auto-overlay-1match' => \$auth_auto_overlay_1match,
124 'auth-auto-overlay-best-match' => \$auth_auto_overlay_best_match,
126 'net-server-config=s' => \$net_server_conf,
133 Path to OpenSRF configuration file.
136 Path to Net::Server configuration file. Defaults to $net_server_conf.
137 Only required if --spoolfile is not set.
140 Log additional details
143 Evergreen user account which performs the import actions.
146 Evergreen user account password
149 Evergreen workstation
152 MARC data received via the network is stored in a temporary
153 file so Vandelay can access it. This must be a directory
154 the open-ils.vandelay service can access. If you want the
155 file deleted after completion, be sure open-ils.vandelay
156 has write access to the directory and the file.
157 This value defaults to the Vandelay data directory, however
158 this configuratoin value is only accessible when run from
159 the private opensrf domain, which you may not want to do.
162 Path to a MARC file to load. When a --spoolfile is specified,
163 this script will send the file to vandelay for processing,
164 then exit when complete. In other words, it does not stay
165 alive to accept requests from the network.
168 Amount of time in seconds this script will wait after receiving
169 a connection on the socket and before recieving a complete
170 MARC record. This prevents unintentional denial of service by
171 clients connecting and never sending anything.
174 ID of the vandelay bib record merge profile
177 ID of the vandelay bib record queue
183 ID of the vandelay authority record merge profile
186 ID of the vandelay authority record queue
188 --bib-import-no-match
189 --bib-auto-overlay-exact
190 --bib-auto-overlay-1match
191 --bib-auto-overlay-best-match
192 --auth-import-no-match
193 --auth-auto-overlay-exact
194 --auth-auto-overlay-1match
195 --auth-auto-overlay-best-match
197 Bib and auth import options which map directly to Vandelay import
201 Apply import-no-match to bibs and auto-overlay-exact to auths.
203 $0 --bib-import-no-match --auth-auto-overlay-exact
206 Show this help message
213 if ($import_no_match) {
214 warn "\nimport-no-match is deprecated; use bib-import-no-match\n";
215 $bib_import_no_match = $import_no_match;
217 if ($auto_overlay_exact) {
218 warn "\nauto-overlay-exact is deprecated; use bib-auto-overlay-exact\n";
219 $bib_auto_overlay_exact = $auto_overlay_exact;
221 if ($auto_overlay_1match) {
222 warn "\nauto-overlay-1match is deprecated; use bib-auto-overlay-1match\n";
223 $bib_auto_overlay_1match = $auto_overlay_1match;
225 if ($auto_overlay_best_match) {
226 warn "\nauto-overlay-best-match is deprecated; use bib-auto-overlay-best-match\n";
227 $bib_auto_overlay_best_match = $auto_overlay_best_match;
229 if ($deprecated_queue) {
230 warn "\n--queue is deprecated; use --bib-queue\n";
231 $bib_queue = $deprecated_queue;
235 die "--username, --password, AND --workstation required. --help for more info.\n"
236 unless $username and $password and $workstation;
237 die "--bib-queue OR --auth-queue required. --help for more info.\n"
238 unless $bib_queue or $auth_queue;
241 return if $tempdir; # already read or user provided
242 $tempdir = OpenSRF::Utils::SettingsClient->new->config_value(
243 qw/apps open-ils.vandelay app_settings databases importer/
247 # Sets cur_rec_type to 'auth' if leader/06 of the first
248 # parseable record is 'z', otherwise 'bib'.
249 sub set_record_type {
250 my $file_name = shift;
252 my $marctype = 'USMARC';
253 open(F, $file_name) or
254 die "Unable to open MARC file $file_name : $!\n";
255 $marctype = 'XML' if (getc(F) =~ /^\D/o);
258 my $batch = new MARC::Batch ($marctype, $file_name);
264 eval {$rec = $batch->next};
265 next if $@; # record parse failure
267 $ldr_06 = substr($rec->leader(), 6, 1) || '';
271 $cur_rec_type = $ldr_06 eq 'z' ? 'auth' : 'bib';
273 $cur_queue = $cur_rec_type eq 'auth' ? $auth_queue : $bib_queue;
274 $cur_rec_source = $cur_rec_type eq 'auth' ? '' : $bib_source;
278 # set vandelay options based on command line ops and the type of record
279 # currently in process.
280 sub compile_vandelay_ops {
284 merge_profile => $cur_merge_profile ? $cur_merge_profile->id : undef
287 if ($cur_rec_type eq 'auth') {
288 $vl_ops->{import_no_match} = $auth_import_no_match;
289 $vl_ops->{auto_overlay_exact} = $auth_auto_overlay_exact;
290 $vl_ops->{auto_overlay_1match} = $auth_auto_overlay_1match;
291 $vl_ops->{auto_overlay_best_match} = $auth_auto_overlay_best_match;
293 $vl_ops->{import_no_match} = $bib_import_no_match;
294 $vl_ops->{auto_overlay_exact} = $bib_auto_overlay_exact;
295 $vl_ops->{auto_overlay_1match} = $bib_auto_overlay_1match;
296 $vl_ops->{auto_overlay_best_match} = $bib_auto_overlay_best_match;
299 # Default to exact match only if not other strategy is selected.
300 $vl_ops->{auto_overlay_exact} = 1
302 $vl_ops->{auto_overlay_1match} or
303 $vl_ops->{auto_overlay_best_match}
306 $logger->info("VL options: ".Dumper($vl_ops)) if $verbose;
311 my $file_name = shift; # filename
313 set_record_type($file_name);
315 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
316 my $req = $ses->request(
317 "open-ils.vandelay.$cur_rec_type.process_spool.stream_results",
318 $authtoken, undef, # cache key not needed
319 $cur_queue, 'import', $file_name, $cur_rec_source
323 while(my $resp = $req->recv) {
326 $logger->error("Error spooling MARC data: $resp");
328 } elsif($resp->content) {
329 push(@rec_ids, $resp->content);
336 sub import_queued_records {
338 my $vl_ops = compile_vandelay_ops();
340 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
341 my $req = $ses->request(
342 "open-ils.vandelay.${cur_rec_type}_record.list.import",
343 $authtoken, $rec_ids, $vl_ops
346 # collect the successfully imported vandelay records
349 while(my $resp = $req->recv) {
351 $logger->error("Error importing MARC data: $resp");
353 } elsif(my $data = $resp->content) {
355 if($data->{err_event}) {
357 $logger->error(Dumper($data->{err_event}));
360 } elsif ($data->{no_import}) {
361 # no errors, just didn't import, because of rules.
365 "record failed to satisfy Vandelay merge/quality/etc. ".
366 "requirements: " . ($data->{imported} || ''));
369 push(@cleanup_recs, $data->{imported}) if $data->{imported};
374 # clean up the successfully imported vandelay records to prevent queue bloat
375 my $pcrud = OpenSRF::AppSession->create('open-ils.pcrud');
377 $pcrud->request('open-ils.pcrud.transaction.begin', $authtoken)->recv;
380 my $api = 'open-ils.pcrud.delete.';
381 $api .= $cur_rec_type eq 'auth' ? 'vqar' : 'vqbr';
383 foreach (@cleanup_recs) {
385 $pcrud->request($api, $authtoken, $_)->recv;
389 $logger->error("Error deleting queued $cur_rec_type record $_: $@");
394 $pcrud->request('open-ils.pcrud.transaction.commit', $authtoken)->recv unless $err;
397 $logger->info("imported queued vandelay records: @cleanup_recs");
398 return (scalar(@cleanup_recs), $failed);
403 # Each child needs its own opensrf connection.
404 sub child_init_hook {
405 OpenSRF::System->bootstrap_client(config_file => $osrf_config);
406 Fieldmapper->import(IDL =>
407 OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
411 # The core Net::Server method
412 # Reads streams of MARC data from the network, saves the data as a file,
413 # then processes the file via vandelay.
414 sub process_request {
416 my $client = $self->{server}->{peeraddr}.':'.$self->{server}->{peerport};
418 $logger->info("$client opened a new connection");
420 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
421 if(!$ph->flush_socket()) {
422 $logger->error("We received a request, but we are no longer connected".
423 " to opensrf. Exiting and dropping request from $client");
429 local $SIG{ALRM} = sub { die "alarm\n" };
430 alarm $wait_time; # prevent accidental tie ups of backend processes
431 local $/ = "\x1D"; # MARC record separator
437 $logger->error("reading from STDIN failed or timed out: $@");
441 $logger->info("stream parser read " . length($data) . " bytes");
445 # copy data to a temporary file so vandelay can scoop it up
446 my ($handle, $tempfile) = tempfile("$0_XXXX", DIR => $tempdir)
447 or die "Cannot create tempfile in $tempdir : $!";
449 print $handle $data or die "Error writing to tempfile $tempfile : $!\n";
452 process_file($tempfile);
455 sub set_merge_profile {
459 return $cur_merge_profile = $bib_merge_profile_obj
460 if $bib_merge_profile_obj and $cur_rec_type eq 'bib';
462 return $cur_merge_profile = $auth_merge_profile_obj
463 if $auth_merge_profile_obj and $cur_rec_type eq 'auth';
465 # fetch un-cached profile
467 my $profile_id = $cur_rec_type eq 'bib' ?
468 $bib_merge_profile : $auth_merge_profile;
470 return $cur_merge_profile = undef unless $profile_id;
472 $cur_merge_profile = $apputils->simplereq(
474 'open-ils.pcrud.retrieve.vmp',
475 $authtoken, $profile_id);
477 # cache profile for later
479 $auth_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'auth';
480 $bib_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'bib';
486 new_auth_token(); # login
487 my $rec_ids = process_spool($file);
488 my ($imported, $failed) = import_queued_records($rec_ids);
490 if (oils_event_equals($imported, 'NO_SESSION')) {
491 # did the session expire while spooling?
492 new_auth_token(); # retry with new authtoken
493 ($imported, $failed) = import_queued_records($rec_ids);
496 oils_event_die($imported);
498 my $profile = $cur_merge_profile ? $cur_merge_profile->name : '';
500 $msg .= "Successfully imported $imported $cur_rec_type records ".
501 "using merge profile '$profile'\n" if $imported;
502 $msg .= "Failed to import $failed $cur_rec_type records\n" if $failed;
503 $msg .= "\x00" unless $spoolfile;
506 clear_auth_token(); # logout
509 # the authtoken will timeout after the configured inactivity period.
510 # When that happens, get a new one.
512 oils_login($username, $password, 'staff', $workstation)
513 or die "Unable to login to Evergreen as user $username";
516 sub clear_auth_token {
517 $apputils->simplereq(
519 'open-ils.auth.session.delete',
525 # -- execution starts here
528 # individual files are processed in standalone mode.
529 # No Net::Server innards are necessary.
531 child_init_hook(); # force an opensrf connection
532 process_file($spoolfile);
536 # No spoolfile, run in Net::Server mode
540 WARNING: This script provides no security layer. Any client that has
541 access to the server+port can inject MARC records into the system.
546 $args{conf_file} = $net_server_conf if -r $net_server_conf;
547 $args{port} = $port if $port;
549 __PACKAGE__->run(%args);