2 # Copyright (C) 2008-2010 Equinox Software, Inc.
3 # Author: Bill Erickson <erickson@esilibrary.com>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
16 use strict; use warnings;
17 use Net::Server::PreFork;
18 use base qw/Net::Server::PreFork/;
21 use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
22 use MARC::File::USMARC;
25 use File::Basename qw/fileparse/;
27 use Getopt::Long qw(:DEFAULT GetOptionsFromArray);
31 use OpenSRF::Utils::Logger qw/$logger/;
32 use OpenSRF::AppSession;
33 use OpenSRF::EX qw/:try/;
34 use OpenILS::Utils::Cronscript;
35 use OpenSRF::Transport::PeerHandle;
36 require 'oils_header.pl';
37 use vars qw/$apputils/;
45 'merge-profile=i' => 0,
47 # 'osrf-config=s' => '/openils/conf/opensrf_core.xml',
57 'import-by-queue' => 0,
58 'import-no-match' => 0,
59 'auto-overlay-exact' => 0,
60 'auto-overlay-1match' => 0,
61 'auto-overlay-best-match' => 0
64 $OpenILS::Utils::Cronscript::debug=1 if $debug;
65 $Getopt::Long::debug=1 if $debug > 1;
66 my $o = OpenILS::Utils::Cronscript->new(\%defaults);
70 if (grep {$_ eq '--'} @ARGV) {
71 print "Splitting options into groups\n" if $debug;
74 $_ eq '--' and last; # stop at the first --
75 push @script_args, $_;
82 print "Calling MyGetOptions ",
83 (@script_args ? "with options: " . join(' ', @script_args) : 'without options from command line'),
86 my $real_opts = $o->MyGetOptions(\@script_args);
88 # GetOptionsFromArray(\@script_args, \%defaults, %defaults); # similar to
90 $real_opts->{tempdir} ||= tempdir_setting(); # This doesn't go in defaults because it reads config, must come after bootstrap
92 my $bufsize = $real_opts->{buffsize};
93 my $bib_source = $real_opts->{source};
94 my $osrf_config = $real_opts->{'osrf-config'};
95 my $oils_username = $real_opts->{user};
96 my $oils_password = $real_opts->{password};
97 my $help = $real_opts->{help};
98 my $merge_profile = $real_opts->{'merge-profile'};
99 my $queue_id = $real_opts->{queue};
100 my $tempdir = $real_opts->{tempdir};
101 my $import_by_queue = $real_opts->{'import-by-queue'};
102 $debug += $real_opts->{debug};
104 foreach (keys %$real_opts) {
105 print("real_opt->{$_} = ", $real_opts->{$_}, "\n") if $real_opts->{debug} or $debug;
107 my $wait_time = $real_opts->{wait};
110 # DEFAULTS for Net::Server
111 my $filename = fileparse($0, '.pl');
112 my $conf_file = (-r "$filename.conf") ? "$filename.conf" : undef;
113 # $conf_file is the Net::Server config for THIS script (not EG), if it exists and is readable
118 pod2usage(1) if $help;
119 unless ($oils_password) {
120 print STDERR "\nERROR: password option required for session login\n\n";
124 print Dumper($o) if $debug;
127 foreach my $ref (qw/bufsize bib_source osrf_config oils_username oils_password help conf_file debug/) {
129 printf "%16s => %s\n", $ref, (eval("\$$ref") || '');
134 print Dumper($real_opts);
138 sub tempdir_setting {
139 my $ret = $apputils->simplereq( qw# opensrf.settings opensrf.settings.xpath.get
140 /opensrf/default/apps/open-ils.vandelay/app_settings/databases/importer # );
141 return $ret->[0] || '/tmp';
147 WARNING: This script provides no security layer. Any client that has
148 access to the server+port can inject MARC records into the system.
154 return $apputils->simplereq(
156 'open-ils.cat.biblio.record.xml.import',
161 sub old_process_batch_data {
162 my $data = shift or $logger->error("process_batch_data called without any data");
170 open $handle, '<', \$data;
173 my $batch = MARC::Batch->new('USMARC', $handle);
184 eval { $rec = $batch->next; };
187 $logger->error("Failed parsing MARC record $index");
191 last unless $rec; # The only way out
193 my $resp = xml_import($authtoken, $rec->as_xml_record, $bib_source);
195 # has the session timed out?
196 if (oils_event_equals($resp, 'NO_SESSION')) {
198 $resp = xml_import($authtoken, $rec->as_xml_record, $bib_source); # try again w/ new token
200 oils_event_die($resp);
204 return ($imported, $failed);
207 sub process_spool { # filename
209 my $marcfile = shift;
212 if($import_by_queue) {
214 # don't collect the record IDs, just spool the queue
216 $apputils->simplereq(
218 'open-ils.vandelay.bib.process_spool',
229 # collect the newly queued record IDs for processing
231 my $req = $vl_ses->request(
232 'open-ils.vandelay.bib.process_spool.stream_results',
234 undef, # cache key not needed
241 while(my $resp = $req->recv) {
244 $logger->error("Error spooling MARC data: $resp");
246 } elsif($resp->content) {
247 push(@rec_ids, $resp->content);
255 sub bib_queue_import {
257 my $extra = {report_all => 1};
258 $extra->{merge_profile} = $merge_profile if $merge_profile;
259 $extra->{import_no_match} = 1 if $real_opts->{'import-no-match'};
260 $extra->{auto_overlay_1match} = 1 if $real_opts->{'auto-overlay-1match'};
261 $extra->{auto_overlay_best_match} = 1 if $real_opts->{'auto-overlay-best-match'};
263 # default to exact match if no strategy is chosen
264 $extra->{auto_overlay_exact} = 1
265 if $real_opts->{'auto-overlay-exact'} or
266 not ($extra->{auto_overlay_1match} or $extra->{auto_overlay_best_match});
271 if($import_by_queue) {
274 $req = $vl_ses->request(
275 'open-ils.vandelay.bib_queue.import',
282 # import explicit record IDs
284 $req = $vl_ses->request(
285 'open-ils.vandelay.bib_record.list.import',
292 # collect the successfully imported vandelay records
294 while(my $resp = $req->recv) {
296 $logger->error("Error importing MARC data: $resp");
298 } elsif(my $data = $resp->content) {
300 if($data->{err_event}) {
302 $logger->error(Dumper($data->{err_event}));
306 push(@cleanup_recs, $data->{imported}) if $data->{imported};
311 # clean up the successfully imported vandelay records to prevent queue bloat
312 my $pcrud = OpenSRF::AppSession->create('open-ils.pcrud');
314 $pcrud->request('open-ils.pcrud.transaction.begin', $authtoken)->recv;
317 foreach (@cleanup_recs) {
321 $pcrud->request('open-ils.pcrud.delete.vqbr', $authtoken, $_)->recv;
325 $logger->error("Error deleteing queued bib record $_: $err");
329 $pcrud->request('open-ils.pcrud.transaction.commit', $authtoken)->recv unless $err;
332 $logger->info("imported queued vandelay records: @cleanup_recs");
333 return (scalar(@cleanup_recs), $failed);
336 sub process_batch_data {
337 my $data = shift or $logger->error("process_batch_data called without any data");
341 $vl_ses = OpenSRF::AppSession->create('open-ils.vandelay');
343 my ($handle, $tempfile);
345 ($handle, $tempfile) = File::Temp->tempfile("$0_XXXX", DIR => $tempdir) or die "Cannot write tempfile in $tempdir";
352 $logger->info("Calling process_spool on tempfile $tempfile (queue: $queue_id; source: $bib_source)");
353 my $rec_ids = process_spool($tempfile);
355 if (oils_event_equals($rec_ids, 'NO_SESSION')) { # has the session timed out?
357 $rec_ids = process_spool($tempfile); # try again w/ new token
360 my ($imported, $failed) = bib_queue_import($rec_ids);
362 if (oils_event_equals($imported, 'NO_SESSION')) { # has the session timed out?
364 ($imported, $failed) = bib_queue_import(); # try again w/ new token
367 oils_event_die($imported);
369 return ($imported, $failed);
372 sub process_request { # The core Net::Server method
374 my $client = $self->{server}->{client};
376 my $sockname = getpeername($client);
377 my ($port, $ip_addr) = unpack_sockaddr_in($sockname);
378 $logger->info("stream parser received contact from ".inet_ntoa($ip_addr));
380 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
381 if(!$ph->flush_socket()) {
382 $logger->error("We received a request, bu we are no longer connected to opensrf. ".
383 "Exiting and dropping request from $client");
389 local $SIG{ALRM} = sub { die "alarm\n" };
390 alarm $wait_time; # prevent accidental tie ups of backend processes
391 local $/ = "\x1D"; # MARC record separator
397 $logger->error("reading from STDIN failed or timed out: $@");
401 $logger->info("stream parser read " . length($data) . " bytes");
403 my ($imported, $failed) = (0, 0);
405 new_auth_token(); # login
407 if ($real_opts->{noqueue}) {
408 ($imported, $failed) = old_process_batch_data($data);
410 ($imported, $failed) = process_batch_data($data);
413 my $profile = (!$merge_profile) ? '' :
414 $apputils->simplereq(
416 'open-ils.pcrud.retrieve.vmp',
418 $merge_profile)->name;
421 $msg .= "Successfully imported $imported records using merge profile '$profile'\n" if $imported;
422 $msg .= "Failed to import $failed records\n" if $failed;
426 clear_auth_token(); # logout
429 sub standalone_process_request { # The command line version
432 $logger->info("stream parser received file processing request for $file");
434 my $ph = OpenSRF::Transport::PeerHandle->retrieve;
435 if(!$ph->flush_socket()) {
436 $logger->error("We received a request, bu we are no longer connected to opensrf. ".
437 "Exiting and dropping request for $file");
441 my ($imported, $failed) = (0, 0);
443 new_auth_token(); # login
445 if ($real_opts->{noqueue}) {
446 ($imported, $failed) = old_process_batch_data($file, 1);
448 ($imported, $failed) = process_batch_data($file, 1);
451 my $profile = (!$merge_profile) ? '' :
452 $apputils->simplereq(
454 'open-ils.pcrud.retrieve.vmp',
456 $merge_profile)->name;
459 $msg .= "Successfully imported $imported records using merge profile '$profile'\n" if $imported;
460 $msg .= "Failed to import $failed records\n" if $failed;
464 clear_auth_token(); # logout
468 # the authtoken will timeout after the configured inactivity period.
469 # When that happens, get a new one.
471 $authtoken = oils_login($oils_username, $oils_password, 'staff')
472 or die "Unable to login to Evergreen as user $oils_username";
476 sub clear_auth_token {
477 $apputils->simplereq(
479 'open-ils.auth.session.delete',
486 osrf_connect($osrf_config);
487 if ($real_opts->{nodaemon}) {
488 if (!$real_opts->{spoolfile}) {
489 print " --nodaemon mode requested, but no --spoolfile supplied!\n";
492 standalone_process_request($real_opts->{spoolfile});
494 print "Calling Net::Server run ", (@ARGV ? "with command-line options: " . join(' ', @ARGV) : ''), "\n";
495 __PACKAGE__->run(conf_file => $conf_file);
502 marc_stream_importer.pl - Import MARC records via bare socket connection.
506 ./marc_stream_importer.pl [common opts ...] [script opts ...] -- [Net::Server opts ...] &
508 This script uses the EG common options from B<Cronscript>. See --help output for those.
510 Run C<perldoc marc_stream_importer.pl> for full documentation.
512 Note the extra C<--> to separate options for the script wrapper from options for the
513 underlying L<Net::Server> options.
515 Note: this script has to be run in the same directory as B<oils_header.pl>.
517 Typical server-style execution will include a trailing C<&> to run in the background.
521 This script is a L<Net::Server::PreFork> instance for shoving records into Evergreen from a remote system.
525 The only required option is --password
527 --password =<eg_password>
528 --user =<eg_username> default: admin
529 --source =<bib_source> default: 1 Integer
530 --merge-profile =<i> default: 0
531 --tempdir =</temp/dir/> default: from L<opensrf.conf> <open-ils.vandelay/app_settings/databases/importer>
532 --source =<i> default: 1
533 --import-by-queue =<i> default: 0
534 --spoolfile =<import_file> default: NONE File to import in --nodaemon mode
535 --nodaemon default: OFF When used with --spoolfile, turns off Net::Server mode and runs this utility in the foreground
538 =head2 Old style: --noqueue and associated options
540 To bypass vandelay queue processing and push directly into the database (as the old style)
542 --noqueue default: OFF
543 --buffsize =<i> default: 4096 Buffer size. Only used by --noqueue
544 --wait =<i> default: 5 Seconds to read socket before processing. Only used by --noqueue
546 =head2 Net::Server Options
548 By default, the script will use the Net::Server configuration file B<marc_stream_importer.conf>. You can
549 override this by passing a filepath with the --conf_file option.
551 Other Net::Server options include: --port=<port> --min_servers=<X> --max_servers=<Y> and --log_file=[path/to/file]
553 See L<Net::Server> for a complete list.
557 =head3 OCLC Connexion
559 To use this script with OCLC Connexion, configure the client as follows:
561 Under Tools -> Options -> Export (tab)
562 Create -> Choose Connection -> OK -> Leave translation at "None"
563 -> Create -> Create -> choose TCP/IP (internet)
564 -> Enter hostname and Port, leave 'Use Telnet Protocol' checked
565 -> Create/OK your way out of the dialogs
566 Record Characteristics (button) -> Choose 'UTF-8 Unicode' for the Character Set
569 OCLC and Connexion are trademark/service marks of OCLC Online Computer Library Center, Inc.
573 WARNING: This script provides no inherent security layer. Any client that has
574 access to the server+port can inject MARC records into the system.
575 Use the available options (like allow/deny) in the Net::Server config file
576 or via the command line to restrict access as necessary.
580 ./marc_stream_importer.pl \
581 admin open-ils connexion --port 5555 --min_servers 2 \
582 --max_servers=20 --log_file=/openils/var/log/marc_net_importer.log &
584 ./marc_stream_importer.pl \
585 admin open-ils connexion --port 5555 --min_servers 2 \
586 --max_servers=20 --log_file=/openils/var/log/marc_net_importer.log &
590 L<Net::Server::PreFork>, L<marc_stream_importer.conf>
594 Bill Erickson <erickson@esilibrary.com>
595 Joe Atzberger <jatzberger@esilibrary.com>
596 Mike Rylander <miker@esilibrary.com> (nodaemon+spoolfile mode)