2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_force_clean_process = 0;
54 my $opt_diagnostic = 0;
58 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
61 'service=s' => \$opt_service,
62 'config=s' => \$opt_config,
63 'pid-dir=s' => \$opt_pid_dir,
64 'no-daemon' => \$opt_no_daemon,
65 'settings-startup-pause=i' => \$opt_settings_pause,
66 'localhost' => \$opt_localhost,
68 'quiet' => \$opt_quiet,
69 'graceful-shutdown' => \$opt_shutdown_graceful,
70 'fast-shutdown' => \$opt_shutdown_fast,
71 'immediate-shutdown' => \$opt_shutdown_immediate,
72 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
73 'fast-shutdown-all' => \$opt_shutdown_fast_all,
74 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
75 'kill-with-fire' => \$opt_kill_with_fire,
76 'force-clean-process' => \$opt_force_clean_process,
77 'signal-timeout' => \$opt_signal_timeout,
78 'signal=s' => \$opt_signal,
79 'signal-all' => \$opt_signal_all,
80 'start' => \$opt_start,
82 'start-all' => \$opt_start_all,
83 'stop-all' => \$opt_stop_all,
84 'restart' => \$opt_restart,
85 'restart-all' => \$opt_restart_all,
86 'diagnostic' => \$opt_diagnostic
90 $hostname = 'localhost';
91 $ENV{OSRF_HOSTNAME} = $hostname;
94 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
95 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
99 my @services = (@perl_services, map {$_->{service}} @nonperl_services);
101 return 1 unless $service;
102 return 1 if grep { $_ eq $service } @services;
103 msg("$service is not configured to run on $hostname");
105 msg("No services are configured to run on $hostname");
107 msg("Perhaps you meant to use --localhost?") unless $opt_localhost;
115 my @pids = get_service_pids_from_file($service);
118 # no PID files exist. see if the service is running anyway
120 @pids = get_service_pids_from_ps($service);
122 msg("cannot signal $service : no pid file or running process");
127 for my $pid (@pids) {
128 if (kill($signal, $pid) == 0) { # no process was signaled.
129 msg("cannot signal $service: process $pid is not running");
130 my $pidfile = get_pid_file($service);
131 unlink $pidfile if $pidfile;
135 msg("sending $signal signal to pid=$pid $service");
141 # returns 2 if a process should have gone away but did not
142 # in the case of multiple PIDs (e.g. router), return the
143 # status of any failures, but not the successes.
146 my @pids = get_service_pids_from_file($service);
149 for my $pid (@pids) {
151 # to determine whether a process has died, we have to send
152 # a no-op signal to the PID and check the success of that signal
154 for my $i (1..$opt_signal_timeout) {
155 $sig_count = kill(0, $pid);
156 last unless $sig_count;
161 msg("timed out waiting on $service pid=$pid to die");
166 # cleanup successful. remove the PID file
167 my $pidfile = get_pid_file($service);
168 unlink $pidfile if $pidfile;
176 return "$opt_pid_dir/$service.pid";
179 # services usually only have 1 pid, but the router will have at least 2
180 sub get_service_pids_from_file {
182 my $pid_file = get_pid_file($service);
183 return () unless -e $pid_file;
184 my @pids = `cat $pid_file`;
185 s/^\s*|\n//g for @pids;
189 sub get_service_pids_from_ps {
192 my $ps = ($service eq 'router') ?
193 "ps ax | grep 'OpenSRF Router'" :
194 "ps ax | grep 'OpenSRF Listener \\[$service\\]'";
196 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
198 s/^\s*|\n//g for @pids;
205 my $alive = do_init(1);
207 my @services = get_service_list_from_files(1);
210 @conf_services = (@perl_services,
211 map {$_->{service}} @nonperl_services);
212 push(@services, @conf_services);
217 for my $svc (@services) {
218 $len = length($svc) if length($svc) > $len;
222 for my $svc (sort keys %services) {
223 my @pf_pids = get_service_pids_from_file($svc);
224 my @ps_pids = get_service_pids_from_ps($svc);
225 my $svc_str = sprintf("%-${len}s ", $svc);
228 unless(@ps_pids or @pf_pids) {
229 msg("$svc_str is not running");
233 for my $pid (@ps_pids) {
236 my $str = "$svc_str [$pid] ";
237 my $times = `ps -o etime=,cputime= $pid`;
238 $times =~ s/^\s+|\s+$//g;
239 my @times = split(/ /, $times);
240 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
242 if ($svc eq 'router') {
245 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
246 $str .= "#drones=".scalar(@drones);
248 msg("\tERR $svc has no running drones.") unless @drones;
251 msg("\tERR $svc [$pid] NOT configured for this host.")
252 unless grep {$_ eq $svc} @conf_services
255 msg("\tERR $svc [$pid] NOT found in PID file.")
256 unless grep {$_ eq $pid} @pf_pids;
259 for my $pid (@pf_pids) {
261 msg("\tERR $svc Has PID file entry [$pid], ".
262 "which matches no running $svc processes");
269 sub do_start_router {
270 `opensrf_router $opt_config routers`;
272 sleep 2; # give the router time to fork
273 my @pids = `ps -C opensrf_router -o pid=`;
274 s/^\s*|\n//g for @pids;
276 my $pidfile = get_pid_file('router');
277 open(PF, '>', $pidfile) or die "Cannot open $pidfile: $!\n";
280 msg("starting service pid=$_ router");
286 # stop a specific service
288 my ($service, @signals) = @_;
289 @signals = qw/TERM INT KILL/ unless @signals;
290 for my $sig (@signals) {
291 last unless do_signal($service, $sig) == 2;
299 OpenSRF::System->bootstrap_client(config_file => $opt_config);
301 if (!OpenSRF::Transport::PeerHandle->retrieve) {
302 return 0 if $fail_ok;
303 die "Unable to bootstrap client for requests\n";
306 load_settings(); # load the settings config if we can
308 my $sclient = OpenSRF::Utils::SettingsClient->new;
309 my $apps = $sclient->config_value("activeapps", "appname");
311 # disconnect the top-level network handle
312 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
315 $apps = [$apps] unless ref $apps;
316 for my $app (@$apps) {
317 if (!$sclient->config_value('apps', $app)) {
318 msg("Service '$app' is listed for this host, ".
319 "but there is no configuration for it in $opt_config");
322 my $lang = $sclient->config_value('apps', $app, 'language') || '';
323 if ($lang =~ /perl/i) {
324 push(@perl_services, $app);
326 push(@nonperl_services, {service => $app, lang => $lang});
333 # start a specific service
337 my @pf_pids = get_service_pids_from_file($service);
338 my @ps_pids = get_service_pids_from_ps($service);
340 if (@pf_pids) { # had pidfile
343 msg("service $service already running : @ps_pids");
346 } else { # stale pidfile
348 my $pidfile = get_pid_file($service);
349 msg("removing stale pid file $pidfile");
353 } elsif (@ps_pids) { # orphan process
355 if ($opt_force_clean_process) {
356 msg("service $service pid=@ps_pids is running with no pidfile");
357 do_signal($service, 'KILL');
359 msg("service $service pid=@ps_pids is running with no pidfile! ".
360 "use --force-clean-process to automatically kill orphan processes");
365 return do_start_router() if $service eq 'router';
367 load_settings() if $service eq 'opensrf.settings';
369 if(grep { $_ eq $service } @perl_services) {
370 return unless do_daemon($service);
371 OpenSRF::System->run_service($service, $opt_pid_dir);
374 # note: we don't daemonize non-perl services, but instead
375 # assume the controller for other languages manages that.
376 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
378 if ($svc->{lang} =~ /c/i) {
379 system("$C_COMMAND -a start -s $service");
381 } elsif ($svc->{lang} =~ /python/i) {
382 system("$PY_COMMAND -a start -s $service");
388 # should not get here
393 msg("starting all services for $hostname");
396 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
397 do_start('opensrf.settings');
398 # in batch mode, give opensrf.settings plenty of time to start
399 # before any non-Perl services try to connect
400 sleep $opt_settings_pause if $opt_settings_pause;
403 # start Perl services
404 for my $service (@perl_services) {
405 do_start($service) unless $service eq 'opensrf.settings';
408 # start each non-perl service individually instead of using the native
409 # start-all command. this allows us to test for existing pid files
410 # and/or running processes on each service before starting.
411 # it also means each service has to connect-fetch_setting-disconnect
412 # from jabber, which makes startup slightly slower than native start-all
413 do_start($_->{service}) for @nonperl_services;
418 # signal a single service
422 return do_signal_all($signal, $service);
425 # returns the list of running services based on presence of PID files.
426 # the 'router' service is not included by deault, since it's
427 # usually treated special.
428 sub get_service_list_from_files {
429 my $include_router = shift;
430 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
431 s/^\s*|\n//g for @services;
432 s|.*/(.*)\.pid$|$1| for @services;
433 return @services if $include_router;
434 return grep { $_ ne 'router' } @services;
438 my ($signal, @services) = @_;
439 @services = get_service_list_from_files() unless @services;
441 do_signal_send($_, $signal) for @services;
443 # if user passed a know non-shutdown signal, we're done.
444 return if $signal =~ /HUP|USR1|USR2/;
446 do_signal_wait($_) for @services;
449 # pull all opensrf listener and drone PIDs from 'ps',
450 # kill them all, and remove all pid files
451 sub do_kill_with_fire {
452 msg("killing with fire");
454 my @pids = get_running_pids();
456 next unless $_ =~ /\d+/;
457 my $proc = `ps -p $_ -o cmd=`;
459 msg("killing with fire pid=$_ $proc");
463 # remove all of the pid files
464 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
465 s/^\s*|\n//g for @files;
467 msg("removing pid file $_");
472 sub get_running_pids {
475 # start with the listeners, then drones, then routers
477 "ps ax | grep 'OpenSRF Listener' ",
478 "ps ax | grep 'OpenSRF Drone' ",
479 "ps ax | grep 'OpenSRF Router' "
482 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
484 for my $grep (@greps) {
486 s/^\s*|\n//g for @spids;
487 push (@pids, @spids);
493 sub clear_stale_pids {
494 my @pidfile_services = get_service_list_from_files(1);
495 my @running_pids = get_running_pids();
497 for my $svc (@pidfile_services) {
498 my @pids = get_service_pids_from_file($svc);
499 for my $pid (@pids) {
500 next if grep { $_ eq $pid } @running_pids;
501 my $pidfile = get_pid_file($svc);
502 msg("removing stale pid file $pidfile");
511 msg("stopping all services for $hostname");
513 my @services = get_service_list_from_files();
514 @signals = qw/TERM INT KILL/ unless @signals;
516 for my $signal (@signals) {
519 # send the signal to all PIDs
520 do_signal_send($_, $signal) for @services;
522 # then wait for them to go away
523 for my $service (@services) {
524 push(@redo, $service) if do_signal_wait($service) == 2;
528 last unless @services;
531 # graceful shutdown requires the presence of the router, so stop the
532 # router last. See if it's running first to avoid unnecessary warnings.
533 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
538 # daemonize us. return true if we're the child, false if parent
540 return 1 if $opt_no_daemon;
542 my $pid_file = get_pid_file($service);
543 my $pid = OpenSRF::Utils::safe_fork();
545 msg("starting service pid=$pid $service");
553 open STDIN, '</dev/null';
554 open STDOUT, '>/dev/null';
555 open STDERR, '>/dev/null';
556 `echo $$ > $pid_file`;
560 # parses the local settings file
562 my $conf = OpenSRF::Utils::Config->current;
563 my $cfile = $conf->bootstrap->settings_config;
564 return unless $cfile;
565 my $parser = OpenSRF::Utils::SettingsParser->new();
566 $parser->initialize( $cfile );
567 $OpenSRF::Utils::SettingsClient::host_config =
568 $parser->get_server_config($conf->env->hostname);
573 print "* $m\n" unless $opt_quiet;
579 Usage: $0 --localhost --start-all
581 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
582 OpenSRF configuration file
584 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
585 Directory where process-specific PID files are kept
587 --settings-startup-pause
588 How long to give the opensrf.settings server to start up when running
589 in batch mode (start_all). The purpose is to give plenty of time for
590 the settings server to be up and active before any non-Perl services
594 Force the hostname to be 'localhost', instead of the fully qualified
595 domain name for the machine.
598 Specifies which OpenSRF service to control
601 Do not print informational messages to STDOUT
604 Do not detach and run as a daemon process. Useful for debugging.
605 Only works for Perl services and only when starting a single service.
608 Print this help message
611 Print information about running services
613 ==== starting services =====
616 Start the router and all services
619 Start the service specified by --service
622 Restart the router and all services
625 Restart the service specified by --service
627 --force-clean-process
628 When starting a service, if a service process is already running
629 but no pidfile exists, kill the service process before starting
632 ==== stopping services =====
635 Stop the router and all services. Services are sent the TERM signal,
636 followed by the INT signal, followed by the KILL signal. With each
637 iteration, the script pauses up to --signal-timeout seconds waiting
638 for each process to die before sending the next signal.
641 Stop the service specified by --service. See also --stop-all.
642 If the requested service does not have a matching PID file, an
643 attempt to locate the PID via 'ps' will be made.
645 --graceful-shutdown-all
646 Send TERM signal to all services + router
649 Send TERM signal to the service specified by --service
652 Send INT signal to all services + router
655 Send INT signal to the service specified by --service
657 --immediate-shutdown-all
658 Send KILL signal to all services + router
661 Send KILL signal to the service specified by --service
664 Send KILL signal to all running services + routers, regardless of
665 the presence of a PID file, and remove all PID files indiscriminately.
667 ==== signaling services =====
670 Send signal to all services
673 Name of signal to send. If --signal-all is not specified, the
674 signal will be sent to the service specified by --service.
677 Seconds to wait for a process to die after sending a shutdown signal.
678 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
684 # we do not verify services for stop/signal actions, since those may
685 # legitimately be used against services not (or no longer) configured
686 # to run on the selected host.
687 do_init() and verify_services($opt_service) if
693 # starting services. do_init() handled above
694 do_start($opt_service) if $opt_start;
695 do_stop($opt_service) and do_start($opt_service) if $opt_restart;
696 do_start_all() if $opt_start_all;
697 do_stop_all() and do_start_all() if $opt_restart_all;
700 do_stop($opt_service) if $opt_stop;
701 do_stop_all() if $opt_stop_all;
702 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
703 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
704 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
705 do_stop_all('TERM') if $opt_shutdown_graceful_all;
706 do_stop_all('INT') if $opt_shutdown_fast_all;
707 do_stop_all('KILL') if $opt_shutdown_immediate_all;
708 do_kill_with_fire() if $opt_kill_with_fire;
711 do_signal($opt_service, $opt_signal) if $opt_signal;
712 do_signal_all($opt_signal) if $opt_signal_all;
715 do_diagnostic() if $opt_diagnostic;
718 # show help if no action was requested
719 do_help() if $opt_help or not (
728 $opt_shutdown_graceful or
729 $opt_shutdown_graceful_all or
730 $opt_shutdown_fast or
731 $opt_shutdown_fast_all or
732 $opt_shutdown_immediate or
733 $opt_shutdown_immediate_all or
734 $opt_kill_with_fire or