2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_force_clean_process = 0;
54 my $opt_diagnostic = 0;
58 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
61 'service=s' => \$opt_service,
62 'config=s' => \$opt_config,
63 'pid-dir=s' => \$opt_pid_dir,
64 'no-daemon' => \$opt_no_daemon,
65 'settings-startup-pause=i' => \$opt_settings_pause,
66 'localhost' => \$opt_localhost,
68 'quiet' => \$opt_quiet,
69 'graceful-shutdown' => \$opt_shutdown_graceful,
70 'fast-shutdown' => \$opt_shutdown_fast,
71 'immediate-shutdown' => \$opt_shutdown_immediate,
72 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
73 'fast-shutdown-all' => \$opt_shutdown_fast_all,
74 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
75 'kill-with-fire' => \$opt_kill_with_fire,
76 'force-clean-process' => \$opt_force_clean_process,
77 'signal-timeout' => \$opt_signal_timeout,
78 'signal=s' => \$opt_signal,
79 'signal-all' => \$opt_signal_all,
80 'start' => \$opt_start,
82 'start-all' => \$opt_start_all,
83 'stop-all' => \$opt_stop_all,
84 'restart' => \$opt_restart,
85 'restart-all' => \$opt_restart_all,
86 'diagnostic' => \$opt_diagnostic
90 $hostname = 'localhost';
91 $ENV{OSRF_HOSTNAME} = $hostname;
94 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
95 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
101 my @pids = get_service_pids_from_file($service);
104 # no PID files exist. see if the service is running anyway
106 @pids = get_service_pids_from_ps($service);
108 msg("cannot signal $service : no pid file or running process");
113 for my $pid (@pids) {
114 if (kill($signal, $pid) == 0) { # no process was signaled.
115 msg("cannot signal $service: process $pid is not running");
116 my $pidfile = get_pid_file($service);
117 unlink $pidfile if $pidfile;
121 msg("sending $signal signal to pid=$pid $service");
127 # returns 2 if a process should have gone away but did not
128 # in the case of multiple PIDs (e.g. router), return the
129 # status of any failures, but not the successes.
132 my @pids = get_service_pids_from_file($service);
135 for my $pid (@pids) {
137 # to determine whether a process has died, we have to send
138 # a no-op signal to the PID and check the success of that signal
140 for my $i (1..$opt_signal_timeout) {
141 $sig_count = kill(0, $pid);
142 last unless $sig_count;
147 msg("timed out waiting on $service pid=$pid to die");
152 # cleanup successful. remove the PID file
153 my $pidfile = get_pid_file($service);
154 unlink $pidfile if $pidfile;
162 return "$opt_pid_dir/$service.pid";
165 # services usually only have 1 pid, but the router will have at least 2
166 sub get_service_pids_from_file {
168 my $pid_file = get_pid_file($service);
169 return () unless -e $pid_file;
170 my @pids = `cat $pid_file`;
171 s/^\s*|\n//g for @pids;
175 sub get_service_pids_from_ps {
178 my $ps = ($service eq 'router') ?
179 "ps ax | grep 'OpenSRF Router'" :
180 "ps ax | grep 'OpenSRF Listener \\[$service\\]'";
182 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
184 s/^\s*|\n//g for @pids;
191 my $alive = do_init(1);
193 my @services = get_service_list_from_files(1);
196 @conf_services = (@perl_services,
197 map {$_->{service}} @nonperl_services);
198 push(@services, @conf_services);
203 for my $svc (@services) {
204 $len = length($svc) if length($svc) > $len;
208 for my $svc (sort keys %services) {
209 my @pf_pids = get_service_pids_from_file($svc);
210 my @ps_pids = get_service_pids_from_ps($svc);
211 my $svc_str = sprintf("%-${len}s ", $svc);
214 unless(@ps_pids or @pf_pids) {
215 msg("$svc_str is not running");
219 for my $pid (@ps_pids) {
222 my $str = "$svc_str [$pid] ";
223 my $times = `ps -o etime=,cputime= $pid`;
224 $times =~ s/^\s+|\s+$//g;
225 my @times = split(/ /, $times);
226 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
228 if ($svc eq 'router') {
231 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
232 $str .= "#drones=".scalar(@drones);
234 msg("\tERR $svc has no running drones.") unless @drones;
237 msg("\tERR $svc [$pid] NOT configured for this host.")
238 unless grep {$_ eq $svc} @conf_services
241 msg("\tERR $svc [$pid] NOT found in PID file.")
242 unless grep {$_ eq $pid} @pf_pids;
245 for my $pid (@pf_pids) {
247 msg("\tERR $svc Has PID file entry [$pid], ".
248 "which matches no running $svc processes");
255 sub do_start_router {
256 `opensrf_router $opt_config routers`;
258 sleep 2; # give the router time to fork
259 my @pids = `ps -C opensrf_router -o pid=`;
260 s/^\s*|\n//g for @pids;
262 my $pidfile = get_pid_file('router');
263 open(PF, '>', $pidfile) or die "Cannot open $pidfile: $!\n";
266 msg("starting service pid=$_ router");
272 # stop a specific service
274 my ($service, @signals) = @_;
275 @signals = qw/TERM INT KILL/ unless @signals;
276 for my $sig (@signals) {
277 last unless do_signal($service, $sig) == 2;
285 OpenSRF::System->bootstrap_client(config_file => $opt_config);
287 if (!OpenSRF::Transport::PeerHandle->retrieve) {
288 return 0 if $fail_ok;
289 die "Unable to bootstrap client for requests\n";
292 load_settings(); # load the settings config if we can
294 my $sclient = OpenSRF::Utils::SettingsClient->new;
295 my $apps = $sclient->config_value("activeapps", "appname");
297 # disconnect the top-level network handle
298 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
301 $apps = [$apps] unless ref $apps;
302 for my $app (@$apps) {
303 if (!$sclient->config_value('apps', $app)) {
304 msg("Service '$app' is listed for this host, ".
305 "but there is no configuration for it in $opt_config");
308 my $lang = $sclient->config_value('apps', $app, 'language') || '';
309 if ($lang =~ /perl/i) {
310 push(@perl_services, $app);
312 push(@nonperl_services, {service => $app, lang => $lang});
319 # start a specific service
323 my @pf_pids = get_service_pids_from_file($service);
324 my @ps_pids = get_service_pids_from_ps($service);
326 if (@pf_pids) { # had pidfile
329 msg("service $service already running : @ps_pids");
332 } else { # stale pidfile
334 my $pidfile = get_pid_file($service);
335 msg("removing stale pid file $pidfile");
339 } elsif (@ps_pids) { # orphan process
341 if ($opt_force_clean_process) {
342 msg("service $service pid=@ps_pids is running with no pidfile");
343 do_signal($service, 'KILL');
345 msg("service $service pid=@ps_pids is running with no pidfile! ".
346 "use --force-clean-process to automatically kill orphan processes");
351 return do_start_router() if $service eq 'router';
353 load_settings() if $service eq 'opensrf.settings';
355 if(grep { $_ eq $service } @perl_services) {
356 return unless do_daemon($service);
357 OpenSRF::System->run_service($service, $opt_pid_dir);
360 # note: we don't daemonize non-perl services, but instead
361 # assume the controller for other languages manages that.
362 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
364 if ($svc->{lang} =~ /c/i) {
365 system("$C_COMMAND -a start -s $service");
367 } elsif ($svc->{lang} =~ /python/i) {
368 system("$PY_COMMAND -a start -s $service");
374 msg("$service is not configured to run on $hostname");
379 msg("starting all services for $hostname");
382 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
383 do_start('opensrf.settings');
384 # in batch mode, give opensrf.settings plenty of time to start
385 # before any non-Perl services try to connect
386 sleep $opt_settings_pause if $opt_settings_pause;
389 # start Perl services
390 for my $service (@perl_services) {
391 do_start($service) unless $service eq 'opensrf.settings';
394 # start each non-perl service individually instead of using the native
395 # start-all command. this allows us to test for existing pid files
396 # and/or running processes on each service before starting.
397 # it also means each service has to connect-fetch_setting-disconnect
398 # from jabber, which makes startup slightly slower than native start-all
399 do_start($_->{service}) for @nonperl_services;
404 # signal a single service
408 return do_signal_all($signal, $service);
411 # returns the list of running services based on presence of PID files.
412 # the 'router' service is not included by deault, since it's
413 # usually treated special.
414 sub get_service_list_from_files {
415 my $include_router = shift;
416 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
417 s/^\s*|\n//g for @services;
418 s|.*/(.*)\.pid$|$1| for @services;
419 return @services if $include_router;
420 return grep { $_ ne 'router' } @services;
424 my ($signal, @services) = @_;
425 @services = get_service_list_from_files() unless @services;
427 do_signal_send($_, $signal) for @services;
429 # if user passed a know non-shutdown signal, we're done.
430 return if $signal =~ /HUP|USR1|USR2/;
432 do_signal_wait($_) for @services;
435 # pull all opensrf listener and drone PIDs from 'ps',
436 # kill them all, and remove all pid files
437 sub do_kill_with_fire {
438 msg("killing with fire");
440 my @pids = get_running_pids();
442 next unless $_ =~ /\d+/;
443 my $proc = `ps -p $_ -o cmd=`;
445 msg("killing with fire pid=$_ $proc");
449 # remove all of the pid files
450 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
451 s/^\s*|\n//g for @files;
453 msg("removing pid file $_");
458 sub get_running_pids {
461 # start with the listeners, then drones, then routers
463 "ps ax | grep 'OpenSRF Listener' ",
464 "ps ax | grep 'OpenSRF Drone' ",
465 "ps ax | grep 'OpenSRF Router' "
468 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
470 for my $grep (@greps) {
472 s/^\s*|\n//g for @spids;
473 push (@pids, @spids);
479 sub clear_stale_pids {
480 my @pidfile_services = get_service_list_from_files(1);
481 my @running_pids = get_running_pids();
483 for my $svc (@pidfile_services) {
484 my @pids = get_service_pids_from_file($svc);
485 for my $pid (@pids) {
486 next if grep { $_ eq $pid } @running_pids;
487 my $pidfile = get_pid_file($svc);
488 msg("removing stale pid file $pidfile");
497 msg("stopping all services for $hostname");
499 my @services = get_service_list_from_files();
500 @signals = qw/TERM INT KILL/ unless @signals;
502 for my $signal (@signals) {
505 # send the signal to all PIDs
506 do_signal_send($_, $signal) for @services;
508 # then wait for them to go away
509 for my $service (@services) {
510 push(@redo, $service) if do_signal_wait($service) == 2;
514 last unless @services;
517 # graceful shutdown requires the presence of the router, so stop the
518 # router last. See if it's running first to avoid unnecessary warnings.
519 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
524 # daemonize us. return true if we're the child, false if parent
526 return 1 if $opt_no_daemon;
528 my $pid_file = get_pid_file($service);
529 my $pid = OpenSRF::Utils::safe_fork();
531 msg("starting service pid=$pid $service");
539 open STDIN, '</dev/null';
540 open STDOUT, '>/dev/null';
541 open STDERR, '>/dev/null';
542 `echo $$ > $pid_file`;
546 # parses the local settings file
548 my $conf = OpenSRF::Utils::Config->current;
549 my $cfile = $conf->bootstrap->settings_config;
550 return unless $cfile;
551 my $parser = OpenSRF::Utils::SettingsParser->new();
552 $parser->initialize( $cfile );
553 $OpenSRF::Utils::SettingsClient::host_config =
554 $parser->get_server_config($conf->env->hostname);
559 print "* $m\n" unless $opt_quiet;
565 Usage: $0 --localhost --start-all
567 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
568 OpenSRF configuration file
570 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
571 Directory where process-specific PID files are kept
573 --settings-startup-pause
574 How long to give the opensrf.settings server to start up when running
575 in batch mode (start_all). The purpose is to give plenty of time for
576 the settings server to be up and active before any non-Perl services
580 Force the hostname to be 'localhost', instead of the fully qualified
581 domain name for the machine.
584 Specifies which OpenSRF service to control
587 Do not print informational messages to STDOUT
590 Do not detach and run as a daemon process. Useful for debugging.
591 Only works for Perl services and only when starting a single service.
594 Print this help message
597 Print information about running services
599 ==== starting services =====
602 Start the router and all services
605 Start the service specified by --service
608 Restart the router and all services
611 Restart the service specified by --service
613 --force-clean-process
614 When starting a service, if a service process is already running
615 but no pidfile exists, kill the service process before starting
618 ==== stopping services =====
621 Stop the router and all services. Services are sent the TERM signal,
622 followed by the INT signal, followed by the KILL signal. With each
623 iteration, the script pauses up to --signal-timeout seconds waiting
624 for each process to die before sending the next signal.
627 Stop the service specified by --service. See also --stop-all.
628 If the requested service does not have a matching PID file, an
629 attempt to locate the PID via 'ps' will be made.
631 --graceful-shutdown-all
632 Send TERM signal to all services + router
635 Send TERM signal to the service specified by --service
638 Send INT signal to all services + router
641 Send INT signal to the service specified by --service
643 --immediate-shutdown-all
644 Send KILL signal to all services + router
647 Send KILL signal to the service specified by --service
650 Send KILL signal to all running services + routers, regardless of
651 the presence of a PID file, and remove all PID files indiscriminately.
653 ==== signaling services =====
656 Send signal to all services
659 Name of signal to send. If --signal-all is not specified, the
660 signal will be sent to the service specified by --service.
663 Seconds to wait for a process to die after sending a shutdown signal.
664 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
671 do_init() and do_start($opt_service) if $opt_start;
672 do_init() and do_stop($opt_service) and do_start($opt_service) if $opt_restart;
673 do_init() and do_start_all() if $opt_start_all;
674 do_init() and do_stop_all() and do_start_all() if $opt_restart_all;
677 do_stop($opt_service) if $opt_stop;
678 do_stop_all() if $opt_stop_all;
679 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
680 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
681 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
682 do_stop_all('TERM') if $opt_shutdown_graceful_all;
683 do_stop_all('INT') if $opt_shutdown_fast_all;
684 do_stop_all('KILL') if $opt_shutdown_immediate_all;
685 do_kill_with_fire() if $opt_kill_with_fire;
688 do_signal($opt_service, $opt_signal) if $opt_signal;
689 do_signal_all($opt_signal) if $opt_signal_all;
692 do_diagnostic() if $opt_diagnostic;
695 # show help if no action was requested
696 do_help() if $opt_help or not (
705 $opt_shutdown_graceful or
706 $opt_shutdown_graceful_all or
707 $opt_shutdown_fast or
708 $opt_shutdown_fast_all or
709 $opt_shutdown_immediate or
710 $opt_shutdown_immediate_all or
711 $opt_kill_with_fire or