2 # Copyright (C) 2008-2014 Equinox Software, Inc.
3 # Copyright (C) 2014 King County Library System
4 # Author: Bill Erickson <berickxx@gmail.com>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 # Sends MARC records, either from a file or from data delivered
18 # via the network, to open-ils.vandelay to be imported.
19 # ---------------------------------------------------------------
22 use Net::Server::PreFork;
23 use base qw/Net::Server::PreFork/;
25 require "$FindBin::Bin/oils_header.pl";
27 use vars qw/$apputils $authtoken/;
32 use MARC::File::XML (BinaryEncoding => 'UTF-8');
33 use MARC::File::USMARC;
34 use File::Basename qw/fileparse/;
35 use File::Temp qw/tempfile/;
36 use OpenSRF::AppSession;
37 use OpenSRF::Utils::Logger qw/$logger/;
38 use OpenSRF::Transport::PeerHandle;
39 use OpenSRF::Utils::SettingsClient;
42 $Data::Dumper::Indent=0; # for logging
44 # This script will always be an entry point for opensrf,
45 # so go ahead and force log client.
46 $ENV{OSRF_LOG_CLIENT} = 1;
48 # these are updated with each new batch of records
54 my $cur_merge_profile; # this is an object
55 my $bib_merge_profile_obj;
56 my $auth_merge_profile_obj;
60 my $osrf_config = '@sysconfdir@/opensrf_core.xml';
68 my $bib_merge_profile;
69 my $auth_merge_profile;
74 my $bib_import_no_match;
75 my $bib_auto_overlay_exact;
76 my $bib_auto_overlay_1match;
77 my $bib_auto_overlay_best_match;
78 my $bib_match_quality_ratio;
79 my $auth_import_no_match;
80 my $auth_auto_overlay_exact;
81 my $auth_auto_overlay_1match;
82 my $auth_auto_overlay_best_match;
83 my $auth_match_quality_ratio;
85 # deprecated options; these map to their bib_* equivalents
87 my $auto_overlay_exact;
88 my $auto_overlay_1match;
89 my $auto_overlay_best_match;
94 my $net_server_conf = (-r "@sysconfdir@/marc_stream_importer.conf") ? "@sysconfdir@/marc_stream_importer.conf" : undef;
97 'osrf-config=s' => \$osrf_config,
98 'verbose' => \$verbose,
99 'username=s' => \$username,
100 'password=s' => \$password,
101 'workstation=s' => \$workstation,
102 'tempdir=s' => \$tempdir,
103 'spoolfile=s' => \$spoolfile,
104 'wait=i' => \$wait_time,
105 'merge-profile=i' => \$bib_merge_profile,
106 'queue=i' => \$deprecated_queue,
107 'bib-queue=i' => \$bib_queue,
108 'source=i' => \$bib_source,
109 'auth-merge-profile=i' => \$auth_merge_profile,
110 'auth-queue=i' => \$auth_queue,
113 'import-no-match' => \$import_no_match,
114 'auto-overlay-exact' => \$auto_overlay_exact,
115 'auto-overlay-1match' => \$auto_overlay_1match,
116 'auto-overlay-best-match' => \$auto_overlay_best_match,
119 'bib-import-no-match' => \$bib_import_no_match,
120 'bib-auto-overlay-exact' => \$bib_auto_overlay_exact,
121 'bib-auto-overlay-1match' => \$bib_auto_overlay_1match,
122 'bib-auto-overlay-best-match' => \$bib_auto_overlay_best_match,
123 'bib-match-quality-ratio=f' => \$bib_match_quality_ratio,
124 'auth-import-no-match' => \$auth_import_no_match,
125 'auth-auto-overlay-exact' => \$auth_auto_overlay_exact,
126 'auth-auto-overlay-1match' => \$auth_auto_overlay_1match,
127 'auth-auto-overlay-best-match' => \$auth_auto_overlay_best_match,
128 'auth-match-quality-ratio=f' => \$auth_match_quality_ratio,
130 'net-server-config=s' => \$net_server_conf,
137 Path to OpenSRF configuration file.
140 Path to Net::Server configuration file. Defaults to $net_server_conf.
141 Only required if --spoolfile is not set.
144 Log additional details
147 Evergreen user account which performs the import actions.
150 Evergreen user account password
153 Evergreen workstation
156 MARC data received via the network is stored in a temporary
157 file so Vandelay can access it. This must be a directory
158 the open-ils.vandelay service can access. If you want the
159 file deleted after completion, be sure open-ils.vandelay
160 has write access to the directory and the file.
161 This value defaults to the Vandelay data directory, however
162 this configuratoin value is only accessible when run from
163 the private opensrf domain, which you may not want to do.
166 Path to a MARC file to load. When a --spoolfile is specified,
167 this script will send the file to vandelay for processing,
168 then exit when complete. In other words, it does not stay
169 alive to accept requests from the network.
172 Amount of time in seconds this script will wait after receiving
173 a connection on the socket and before recieving a complete
174 MARC record. This prevents unintentional denial of service by
175 clients connecting and never sending anything.
178 ID of the vandelay bib record merge profile
181 ID of the vandelay bib record queue
187 ID of the vandelay authority record merge profile
190 ID of the vandelay authority record queue
192 --bib-import-no-match
193 --bib-auto-overlay-exact
194 --bib-auto-overlay-1match
195 --bib-auto-overlay-best-match
196 --bib-match-quality-ratio
197 --auth-import-no-match
198 --auth-auto-overlay-exact
199 --auth-auto-overlay-1match
200 --auth-auto-overlay-best-match
201 --auth-match-quality-ratio
203 Bib and auth import options which map directly to Vandelay import
207 Apply import-no-match to bibs and auto-overlay-exact to auths.
209 $0 --bib-import-no-match --auth-auto-overlay-exact
212 Show this help message
219 if ($import_no_match) {
220 warn "\nimport-no-match is deprecated; use bib-import-no-match\n";
221 $bib_import_no_match = $import_no_match;
223 if ($auto_overlay_exact) {
224 warn "\nauto-overlay-exact is deprecated; use bib-auto-overlay-exact\n";
225 $bib_auto_overlay_exact = $auto_overlay_exact;
227 if ($auto_overlay_1match) {
228 warn "\nauto-overlay-1match is deprecated; use bib-auto-overlay-1match\n";
229 $bib_auto_overlay_1match = $auto_overlay_1match;
231 if ($auto_overlay_best_match) {
232 warn "\nauto-overlay-best-match is deprecated; use bib-auto-overlay-best-match\n";
233 $bib_auto_overlay_best_match = $auto_overlay_best_match;
235 if ($deprecated_queue) {
236 warn "\n--queue is deprecated; use --bib-queue\n";
237 $bib_queue = $deprecated_queue;
241 die "--username, --password, AND --workstation required. --help for more info.\n"
242 unless $username and $password and $workstation;
243 die "--bib-queue OR --auth-queue required. --help for more info.\n"
244 unless $bib_queue or $auth_queue;
247 return if $tempdir; # already read or user provided
248 $tempdir = OpenSRF::Utils::SettingsClient->new->config_value(
249 qw/apps open-ils.vandelay app_settings databases importer/
253 # Sets cur_rec_type to 'auth' if leader/06 of the first
254 # parseable record is 'z', otherwise 'bib'.
255 sub set_record_type {
256 my $file_name = shift;
258 my $marctype = 'USMARC';
259 open(F, $file_name) or
260 die "Unable to open MARC file $file_name : $!\n";
261 $marctype = 'XML' if (getc(F) =~ /^\D/o);
264 my $batch = new MARC::Batch ($marctype, $file_name);
270 eval {$rec = $batch->next};
271 next if $@; # record parse failure
273 $ldr_06 = substr($rec->leader(), 6, 1) || '';
277 $cur_rec_type = $ldr_06 eq 'z' ? 'auth' : 'bib';
279 $cur_queue = $cur_rec_type eq 'auth' ? $auth_queue : $bib_queue;
280 $cur_rec_source = $cur_rec_type eq 'auth' ? '' : $bib_source;
284 # set vandelay options based on command line ops and the type of record
285 # currently in process.
286 sub compile_vandelay_ops {
290 merge_profile => $cur_merge_profile ? $cur_merge_profile->id : undef
293 if ($cur_rec_type eq 'auth') {
294 $vl_ops->{import_no_match} = $auth_import_no_match;
295 $vl_ops->{auto_overlay_exact} = $auth_auto_overlay_exact;
296 $vl_ops->{auto_overlay_1match} = $auth_auto_overlay_1match;
297 $vl_ops->{auto_overlay_best_match} = $auth_auto_overlay_best_match;
298 $vl_ops->{match_quality_ratio} = $auth_match_quality_ratio;
300 $vl_ops->{import_no_match} = $bib_import_no_match;
301 $vl_ops->{auto_overlay_exact} = $bib_auto_overlay_exact;
302 $vl_ops->{auto_overlay_1match} = $bib_auto_overlay_1match;
303 $vl_ops->{auto_overlay_best_match} = $bib_auto_overlay_best_match;
304 $vl_ops->{match_quality_ratio} = $bib_match_quality_ratio;
307 # Default to exact match only if not other strategy is selected.
308 $vl_ops->{auto_overlay_exact} = 1
310 $vl_ops->{auto_overlay_1match} or
311 $vl_ops->{auto_overlay_best_match}
314 $logger->info("VL options: ".Dumper($vl_ops)) if $verbose;
319 my $file_name = shift; # filename
321 set_record_type($file_name);
323 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
324 my $req = $ses->request(
325 "open-ils.vandelay.$cur_rec_type.process_spool.stream_results",
326 $authtoken, undef, # cache key not needed
327 $cur_queue, 'import', $file_name, $cur_rec_source
331 while(my $resp = $req->recv) {
334 $logger->error("Error spooling MARC data: $resp");
336 } elsif($resp->content) {
337 push(@rec_ids, $resp->content);
344 sub import_queued_records {
346 my $vl_ops = compile_vandelay_ops();
348 my $ses = OpenSRF::AppSession->create('open-ils.vandelay');
349 my $req = $ses->request(
350 "open-ils.vandelay.${cur_rec_type}_record.list.import",
351 $authtoken, $rec_ids, $vl_ops
354 # collect the successfully imported vandelay records
357 while(my $resp = $req->recv) {
359 $logger->error("Error importing MARC data: $resp");
361 } elsif(my $data = $resp->content) {
363 if($data->{err_event}) {
365 $logger->error(Dumper($data->{err_event}));
368 } elsif ($data->{no_import}) {
369 # no errors, just didn't import, because of rules.
373 "record failed to satisfy Vandelay merge/quality/etc. ".
374 "requirements: " . ($data->{imported} || ''));
377 push(@cleanup_recs, $data->{imported}) if $data->{imported};
382 # clean up the successfully imported vandelay records to prevent queue bloat
383 my $pcrud = OpenSRF::AppSession->create('open-ils.pcrud');
385 $pcrud->request('open-ils.pcrud.transaction.begin', $authtoken)->recv;
388 my $api = 'open-ils.pcrud.delete.';
389 $api .= $cur_rec_type eq 'auth' ? 'vqar' : 'vqbr';
391 foreach (@cleanup_recs) {
393 $pcrud->request($api, $authtoken, $_)->recv;
397 $logger->error("Error deleting queued $cur_rec_type record $_: $@");
402 $pcrud->request('open-ils.pcrud.transaction.commit', $authtoken)->recv unless $err;
405 $logger->info("imported queued vandelay records: @cleanup_recs");
406 return (scalar(@cleanup_recs), $failed);
411 # Each child needs its own opensrf connection.
412 sub child_init_hook {
413 OpenSRF::System->bootstrap_client(config_file => $osrf_config);
414 Fieldmapper->import(IDL =>
415 OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
419 # The core Net::Server method
420 # Reads streams of MARC data from the network, saves the data as a file,
421 # then processes the file via vandelay.
422 sub process_request {
424 my $client = $self->{server}->{peeraddr}.':'.$self->{server}->{peerport};
426 $logger->info("$client opened a new connection");
428 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
429 if(!$ph->flush_socket()) {
430 $logger->error("We received a request, but we are no longer connected".
431 " to opensrf. Exiting and dropping request from $client");
437 local $SIG{ALRM} = sub { die "alarm\n" };
438 alarm $wait_time; # prevent accidental tie ups of backend processes
439 local $/ = "\x1D"; # MARC record separator
445 $logger->error("reading from STDIN failed or timed out: $@");
449 $logger->info("stream parser read " . length($data) . " bytes");
453 # copy data to a temporary file so vandelay can scoop it up
454 my $base = fileparse($0, qw(.pl));
455 my ($handle, $tempfile) = tempfile("${base}_XXXX", DIR => $tempdir)
456 or die "Cannot create tempfile in $tempdir : $!";
458 print $handle $data or die "Error writing to tempfile $tempfile : $!\n";
461 process_file($tempfile);
464 sub set_merge_profile {
468 return $cur_merge_profile = $bib_merge_profile_obj
469 if $bib_merge_profile_obj and $cur_rec_type eq 'bib';
471 return $cur_merge_profile = $auth_merge_profile_obj
472 if $auth_merge_profile_obj and $cur_rec_type eq 'auth';
474 # fetch un-cached profile
476 my $profile_id = $cur_rec_type eq 'bib' ?
477 $bib_merge_profile : $auth_merge_profile;
479 return $cur_merge_profile = undef unless $profile_id;
481 $cur_merge_profile = $apputils->simplereq(
483 'open-ils.pcrud.retrieve.vmp',
484 $authtoken, $profile_id);
486 # cache profile for later
488 $auth_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'auth';
489 $bib_merge_profile_obj = $cur_merge_profile if $cur_rec_type eq 'bib';
495 new_auth_token(); # login
496 my $rec_ids = process_spool($file);
497 my ($imported, $failed) = import_queued_records($rec_ids);
499 if (oils_event_equals($imported, 'NO_SESSION')) {
500 # did the session expire while spooling?
501 new_auth_token(); # retry with new authtoken
502 ($imported, $failed) = import_queued_records($rec_ids);
505 oils_event_die($imported);
507 my $profile = $cur_merge_profile ? $cur_merge_profile->name : '';
509 $msg .= "Successfully imported $imported $cur_rec_type records ".
510 "using merge profile '$profile'\n" if $imported;
511 $msg .= "Failed to import $failed $cur_rec_type records\n" if $failed;
512 $msg .= "\x00" unless $spoolfile;
515 clear_auth_token(); # logout
518 # the authtoken will timeout after the configured inactivity period.
519 # When that happens, get a new one.
521 oils_login($username, $password, 'staff', $workstation)
522 or die "Unable to login to Evergreen as user $username";
525 sub clear_auth_token {
526 $apputils->simplereq(
528 'open-ils.auth.session.delete',
534 # -- execution starts here
537 # individual files are processed in standalone mode.
538 # No Net::Server innards are necessary.
540 child_init_hook(); # force an opensrf connection
541 process_file($spoolfile);
545 # No spoolfile, run in Net::Server mode
549 WARNING: This script provides no security layer. Any client that has
550 access to the server+port can inject MARC records into the system.
555 $args{conf_file} = $net_server_conf if -r $net_server_conf;
556 $args{port} = $port if $port;
558 __PACKAGE__->run(%args);