2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_force_clean_process = 0;
54 my $opt_diagnostic = 0;
58 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
61 'service=s' => \$opt_service,
62 'config=s' => \$opt_config,
63 'pid-dir=s' => \$opt_pid_dir,
64 'no-daemon' => \$opt_no_daemon,
65 'settings-startup-pause=i' => \$opt_settings_pause,
66 'localhost' => \$opt_localhost,
68 'quiet' => \$opt_quiet,
69 'graceful-shutdown' => \$opt_shutdown_graceful,
70 'fast-shutdown' => \$opt_shutdown_fast,
71 'immediate-shutdown' => \$opt_shutdown_immediate,
72 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
73 'fast-shutdown-all' => \$opt_shutdown_fast_all,
74 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
75 'kill-with-fire' => \$opt_kill_with_fire,
76 'force-clean-process' => \$opt_force_clean_process,
77 'signal-timeout' => \$opt_signal_timeout,
78 'signal=s' => \$opt_signal,
79 'signal-all' => \$opt_signal_all,
80 'start' => \$opt_start,
82 'start-all' => \$opt_start_all,
83 'stop-all' => \$opt_stop_all,
84 'restart' => \$opt_restart,
85 'restart-all' => \$opt_restart_all,
86 'diagnostic' => \$opt_diagnostic
90 $hostname = 'localhost';
91 $ENV{OSRF_HOSTNAME} = $hostname;
94 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
95 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
101 my @pids = get_service_pids_from_file($service);
104 # no PID files exist. see if the service is running anyway
106 @pids = get_service_pids_from_ps($service);
108 msg("cannot signal $service : no pid file or running process");
113 for my $pid (@pids) {
114 if (kill($signal, $pid) == 0) { # no process was signaled.
115 msg("cannot signal $service: process $pid is not running");
116 my $pidfile = get_pid_file($service);
117 unlink $pidfile if $pidfile;
121 msg("sending $signal signal to pid=$pid $service");
127 # returns 2 if a process should have gone away but did not
128 # in the case of multiple PIDs (e.g. router), return the
129 # status of any failures, but not the successes.
132 my @pids = get_service_pids_from_file($service);
135 for my $pid (@pids) {
137 # to determine whether a process has died, we have to send
138 # a no-op signal to the PID and check the success of that signal
140 for my $i (1..$opt_signal_timeout) {
141 $sig_count = kill(0, $pid);
142 last unless $sig_count;
147 msg("timed out waiting on $service pid=$pid to die");
152 # cleanup successful. remove the PID file
153 my $pidfile = get_pid_file($service);
154 unlink $pidfile if $pidfile;
162 return "$opt_pid_dir/$service.pid";
165 # services usually only have 1 pid, but the router will have at least 2
166 sub get_service_pids_from_file {
168 my $pid_file = get_pid_file($service);
169 return () unless -e $pid_file;
170 my @pids = `cat $pid_file`;
171 s/^\s*|\n//g for @pids;
175 sub get_service_pids_from_ps {
178 my $ps = ($service eq 'router') ?
179 "ps ax | grep 'OpenSRF Router'" :
180 "ps ax | grep 'OpenSRF Listener \\[$service\\]'";
182 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
184 s/^\s*|\n//g for @pids;
191 my $alive = do_init(1);
193 my @services = get_service_list_from_files(1);
196 @conf_services = (@perl_services,
197 map {$_->{service}} @nonperl_services);
198 push(@services, @conf_services);
203 for my $svc (@services) {
204 $len = length($svc) if length($svc) > $len;
208 for my $svc (sort keys %services) {
209 my @pf_pids = get_service_pids_from_file($svc);
210 my @ps_pids = get_service_pids_from_ps($svc);
211 my $svc_str = sprintf("%-${len}s ", $svc);
215 $err .= "NOT configured for this host! [@ps_pids]"
216 unless $svc eq 'router' or
217 grep {$_ eq $svc} @conf_services;
219 $err .= "NOT running! ";
223 if (scalar(@pf_pids) == scalar(@ps_pids)) {
224 # we could use Array::Compare, but requires new dep.
225 for my $pfpid (@pf_pids) {
226 unless (grep {$_ == $pfpid} @ps_pids) {
231 } else { $matching = 0 }
234 $err .= "Process list does not match PID files!";
235 $err .= "\n\tPS=@ps_pids / PID=@pf_pids";
239 msg("$svc_str $err");
243 for my $pid (@ps_pids) {
244 my $str = "$svc_str OK [$pid] ";
246 my $etime = `ps -o etime= $pid`;
247 my $cputime = `ps -o cputime= $pid`;
248 $etime =~ s/^\s*|\s*$//g;
249 $cputime =~ s/^\s*|\s*$//g;
250 $str .= sprintf("uptime=%-11s cputime=%-11s ", $etime, $cputime);
252 if ($svc ne 'router') {
253 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
255 $str .= "#drones=".scalar(@drones);
257 $str .= "NO Running Drones!";
268 sub do_start_router {
269 `opensrf_router $opt_config routers`;
271 sleep 2; # give the router time to fork
272 my @pids = `ps -C opensrf_router -o pid=`;
273 s/^\s*|\n//g for @pids;
275 my $pidfile = get_pid_file('router');
276 open(PF, '>', $pidfile) or die "Cannot open $pidfile: $!\n";
279 msg("starting service pid=$_ router");
285 # stop a specific service
287 my ($service, @signals) = @_;
288 @signals = qw/TERM INT KILL/ unless @signals;
289 for my $sig (@signals) {
290 last unless do_signal($service, $sig) == 2;
298 OpenSRF::System->bootstrap_client(config_file => $opt_config);
300 if (!OpenSRF::Transport::PeerHandle->retrieve) {
301 return 0 if $fail_ok;
302 die "Unable to bootstrap client for requests\n";
305 load_settings(); # load the settings config if we can
307 my $sclient = OpenSRF::Utils::SettingsClient->new;
308 my $apps = $sclient->config_value("activeapps", "appname");
310 # disconnect the top-level network handle
311 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
314 $apps = [$apps] unless ref $apps;
315 for my $app (@$apps) {
316 if (!$sclient->config_value('apps', $app)) {
317 msg("Service '$app' is listed for this host, ".
318 "but there is no configuration for it in $opt_config");
321 my $lang = $sclient->config_value('apps', $app, 'language') || '';
322 if ($lang =~ /perl/i) {
323 push(@perl_services, $app);
325 push(@nonperl_services, {service => $app, lang => $lang});
332 # start a specific service
336 my @pf_pids = get_service_pids_from_file($service);
337 my @ps_pids = get_service_pids_from_ps($service);
339 if (@pf_pids) { # had pidfile
342 msg("service $service already running : @ps_pids");
345 } else { # stale pidfile
347 my $pidfile = get_pid_file($service);
348 msg("removing stale pid file $pidfile");
352 } elsif (@ps_pids) { # orphan process
354 if ($opt_force_clean_process) {
355 msg("service $service pid=@ps_pids is running with no pidfile");
356 do_signal($service, 'KILL');
358 msg("service $service pid=@ps_pids is running with no pidfile! ".
359 "use --force-clean-process to automatically kill orphan processes");
364 return do_start_router() if $service eq 'router';
366 load_settings() if $service eq 'opensrf.settings';
368 if(grep { $_ eq $service } @perl_services) {
369 return unless do_daemon($service);
370 OpenSRF::System->run_service($service, $opt_pid_dir);
373 # note: we don't daemonize non-perl services, but instead
374 # assume the controller for other languages manages that.
375 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
377 if ($svc->{lang} =~ /c/i) {
378 system("$C_COMMAND -a start -s $service");
380 } elsif ($svc->{lang} =~ /python/i) {
381 system("$PY_COMMAND -a start -s $service");
387 msg("$service is not configured to run on $hostname");
392 msg("starting all services for $hostname");
395 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
396 do_start('opensrf.settings');
397 # in batch mode, give opensrf.settings plenty of time to start
398 # before any non-Perl services try to connect
399 sleep $opt_settings_pause if $opt_settings_pause;
402 # start Perl services
403 for my $service (@perl_services) {
404 do_start($service) unless $service eq 'opensrf.settings';
407 # start each non-perl service individually instead of using the native
408 # start-all command. this allows us to test for existing pid files
409 # and/or running processes on each service before starting.
410 # it also means each service has to connect-fetch_setting-disconnect
411 # from jabber, which makes startup slightly slower than native start-all
412 do_start($_->{service}) for @nonperl_services;
417 # signal a single service
421 return do_signal_all($signal, $service);
424 # returns the list of running services based on presence of PID files.
425 # the 'router' service is not included by deault, since it's
426 # usually treated special.
427 sub get_service_list_from_files {
428 my $include_router = shift;
429 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
430 s/^\s*|\n//g for @services;
431 s|.*/(.*)\.pid$|$1| for @services;
432 return @services if $include_router;
433 return grep { $_ ne 'router' } @services;
437 my ($signal, @services) = @_;
438 @services = get_service_list_from_files() unless @services;
440 do_signal_send($_, $signal) for @services;
442 # if user passed a know non-shutdown signal, we're done.
443 return if $signal =~ /HUP|USR1|USR2/;
445 do_signal_wait($_) for @services;
448 # pull all opensrf listener and drone PIDs from 'ps',
449 # kill them all, and remove all pid files
450 sub do_kill_with_fire {
451 msg("killing with fire");
453 my @pids = get_running_pids();
455 next unless $_ =~ /\d+/;
456 my $proc = `ps -p $_ -o cmd=`;
458 msg("killing with fire pid=$_ $proc");
462 # remove all of the pid files
463 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
464 s/^\s*|\n//g for @files;
466 msg("removing pid file $_");
471 sub get_running_pids {
474 # start with the listeners, then drones, then routers
476 "ps ax | grep 'OpenSRF Listener' ",
477 "ps ax | grep 'OpenSRF Drone' ",
478 "ps ax | grep 'OpenSRF Router' "
481 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
483 for my $grep (@greps) {
485 s/^\s*|\n//g for @spids;
486 push (@pids, @spids);
492 sub clear_stale_pids {
493 my @pidfile_services = get_service_list_from_files(1);
494 my @running_pids = get_running_pids();
496 for my $svc (@pidfile_services) {
497 my @pids = get_service_pids_from_file($svc);
498 for my $pid (@pids) {
499 next if grep { $_ eq $pid } @running_pids;
500 my $pidfile = get_pid_file($svc);
501 msg("removing stale pid file $pidfile");
510 msg("stopping all services for $hostname");
512 my @services = get_service_list_from_files();
513 @signals = qw/TERM INT KILL/ unless @signals;
515 for my $signal (@signals) {
518 # send the signal to all PIDs
519 do_signal_send($_, $signal) for @services;
521 # then wait for them to go away
522 for my $service (@services) {
523 push(@redo, $service) if do_signal_wait($service) == 2;
527 last unless @services;
530 # graceful shutdown requires the presence of the router, so stop the
531 # router last. See if it's running first to avoid unnecessary warnings.
532 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
537 # daemonize us. return true if we're the child, false if parent
539 return 1 if $opt_no_daemon;
541 my $pid_file = get_pid_file($service);
542 my $pid = OpenSRF::Utils::safe_fork();
544 msg("starting service pid=$pid $service");
552 open STDIN, '</dev/null';
553 open STDOUT, '>/dev/null';
554 open STDERR, '>/dev/null';
555 `echo $$ > $pid_file`;
559 # parses the local settings file
561 my $conf = OpenSRF::Utils::Config->current;
562 my $cfile = $conf->bootstrap->settings_config;
563 return unless $cfile;
564 my $parser = OpenSRF::Utils::SettingsParser->new();
565 $parser->initialize( $cfile );
566 $OpenSRF::Utils::SettingsClient::host_config =
567 $parser->get_server_config($conf->env->hostname);
572 print "* $m\n" unless $opt_quiet;
578 Usage: $0 --localhost --start-all
580 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
581 OpenSRF configuration file
583 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
584 Directory where process-specific PID files are kept
586 --settings-startup-pause
587 How long to give the opensrf.settings server to start up when running
588 in batch mode (start_all). The purpose is to give plenty of time for
589 the settings server to be up and active before any non-Perl services
593 Force the hostname to be 'localhost', instead of the fully qualified
594 domain name for the machine.
597 Specifies which OpenSRF service to control
600 Do not print informational messages to STDOUT
603 Do not detach and run as a daemon process. Useful for debugging.
604 Only works for Perl services and only when starting a single service.
607 Print this help message
610 Print information about running services
612 ==== starting services =====
615 Start the router and all services
618 Start the service specified by --service
621 Restart the router and all services
624 Restart the service specified by --service
626 --force-clean-process
627 When starting a service, if a service process is already running
628 but no pidfile exists, kill the service process before starting
631 ==== stopping services =====
634 Stop the router and all services. Services are sent the TERM signal,
635 followed by the INT signal, followed by the KILL signal. With each
636 iteration, the script pauses up to --signal-timeout seconds waiting
637 for each process to die before sending the next signal.
640 Stop the service specified by --service. See also --stop-all.
641 If the requested service does not have a matching PID file, an
642 attempt to locate the PID via 'ps' will be made.
644 --graceful-shutdown-all
645 Send TERM signal to all services + router
648 Send TERM signal to the service specified by --service
651 Send INT signal to all services + router
654 Send INT signal to the service specified by --service
656 --immediate-shutdown-all
657 Send KILL signal to all services + router
660 Send KILL signal to the service specified by --service
663 Send KILL signal to all running services + routers, regardless of
664 the presence of a PID file, and remove all PID files indiscriminately.
666 ==== signaling services =====
669 Send signal to all services
672 Name of signal to send. If --signal-all is not specified, the
673 signal will be sent to the service specified by --service.
676 Seconds to wait for a process to die after sending a shutdown signal.
677 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
684 do_init() and do_start($opt_service) if $opt_start;
685 do_init() and do_stop($opt_service) and do_start($opt_service) if $opt_restart;
686 do_init() and do_start_all() if $opt_start_all;
687 do_init() and do_stop_all() and do_start_all() if $opt_restart_all;
690 do_stop($opt_service) if $opt_stop;
691 do_stop_all() if $opt_stop_all;
692 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
693 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
694 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
695 do_stop_all('TERM') if $opt_shutdown_graceful_all;
696 do_stop_all('INT') if $opt_shutdown_fast_all;
697 do_stop_all('KILL') if $opt_shutdown_immediate_all;
698 do_kill_with_fire() if $opt_kill_with_fire;
701 do_signal($opt_service, $opt_signal) if $opt_signal;
702 do_signal_all($opt_signal) if $opt_signal_all;
705 do_diagnostic() if $opt_diagnostic;
708 # show help if no action was requested
709 do_help() if $opt_help or not (
718 $opt_shutdown_graceful or
719 $opt_shutdown_graceful_all or
720 $opt_shutdown_fast or
721 $opt_shutdown_fast_all or
722 $opt_shutdown_immediate or
723 $opt_shutdown_immediate_all or
724 $opt_kill_with_fire or