2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_start_services = 0;
53 my $opt_stop_services = 0;
54 my $opt_restart_services = 0;
55 my $opt_force_clean_process = 0;
56 my $opt_router_de_register = 0;
57 my $opt_router_de_register_all = 0;
58 my $opt_router_re_register = 0;
59 my $opt_router_re_register_all = 0;
61 my $opt_reload_all = 0;
63 my $opt_diagnostic = 0;
64 my $opt_ignore_orphans = 0;
69 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
72 'service=s' => \$opt_service,
73 'config=s' => \$opt_config,
74 'pid-dir=s' => \$opt_pid_dir,
75 'no-daemon' => \$opt_no_daemon,
76 'settings-startup-pause=i' => \$opt_settings_pause,
77 'localhost' => \$opt_localhost,
79 'quiet' => \$opt_quiet,
80 'graceful-shutdown' => \$opt_shutdown_graceful,
81 'fast-shutdown' => \$opt_shutdown_fast,
82 'immediate-shutdown' => \$opt_shutdown_immediate,
83 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
84 'fast-shutdown-all' => \$opt_shutdown_fast_all,
85 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
86 'kill-with-fire' => \$opt_kill_with_fire,
87 'force-clean-process' => \$opt_force_clean_process,
88 'signal-timeout' => \$opt_signal_timeout,
89 'signal=s' => \$opt_signal,
90 'signal-all' => \$opt_signal_all,
91 'start' => \$opt_start,
93 'start-all' => \$opt_start_all,
94 'stop-all' => \$opt_stop_all,
95 'restart' => \$opt_restart,
96 'restart-all' => \$opt_restart_all,
97 'start-services' => \$opt_start_services,
98 'stop-services' => \$opt_stop_services,
99 'restart-services' => \$opt_restart_services,
100 'router-de-register' => \$opt_router_de_register,
101 'router-de-register-all' => \$opt_router_de_register_all,
102 'router-re-register' => \$opt_router_re_register,
103 'router-re-register-all' => \$opt_router_re_register_all,
104 'reload' => \$opt_reload,
105 'reload-all' => \$opt_reload_all,
106 'diagnostic' => \$opt_diagnostic,
107 'ignore-orphans' => \$opt_ignore_orphans
110 if ($opt_localhost) {
111 $hostname = 'localhost';
112 $ENV{OSRF_HOSTNAME} = $hostname;
115 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
116 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
118 sub verify_services {
120 return 1 if $service and $service eq 'router';
121 my @services = (@perl_services, map {$_->{service}} @nonperl_services);
123 return 1 unless $service;
124 return 1 if grep { $_ eq $service } @services;
125 msg("$service is not configured to run on $hostname");
127 msg("No services are configured to run on $hostname");
129 msg("Perhaps you meant to use --localhost?") unless $opt_localhost;
137 my @pids = get_service_pids_from_file($service);
140 # no PID files exist. see if the service is running anyway
142 @pids = get_service_pids_from_ps($service);
144 msg("cannot signal $service : no pid file or running process");
149 for my $pid (@pids) {
150 if (kill($signal, $pid) == 0) { # no process was signaled.
151 msg("cannot signal $service: process $pid is not running");
152 my $pidfile = get_pid_file($service);
153 unlink $pidfile if $pidfile;
157 msg("sending $signal signal to pid=$pid $service");
163 # returns 2 if a process should have gone away but did not
164 # in the case of multiple PIDs (e.g. router), return the
165 # status of any failures, but not the successes.
168 my @pids = get_service_pids_from_file($service);
171 for my $pid (@pids) {
173 # to determine whether a process has died, we have to send
174 # a no-op signal to the PID and check the success of that signal
176 for my $i (1..$opt_signal_timeout) {
177 $sig_count = kill(0, $pid);
178 last unless $sig_count;
183 msg("timed out waiting on $service pid=$pid to die");
188 # cleanup successful. remove the PID file
189 my $pidfile = get_pid_file($service);
190 unlink $pidfile if $pidfile;
198 return "$opt_pid_dir/$service.pid";
201 # services usually only have 1 pid, but the router will have at least 2
202 sub get_service_pids_from_file {
204 my $pid_file = get_pid_file($service);
205 return () unless -e $pid_file;
206 my @pids = `cat $pid_file`;
207 s/^\s*|\n//g for @pids;
211 sub get_service_pids_from_ps {
214 my $ps = ($service eq 'router') ?
215 "ps x | grep 'OpenSRF Router'" :
216 "ps x | grep 'OpenSRF Listener \\[$service\\]'";
218 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
220 s/^\s*|\n//g for @pids;
227 my $alive = do_init(1);
229 my @services = get_service_list_from_files(1);
232 @conf_services = (@perl_services,
233 map {$_->{service}} @nonperl_services);
234 push(@services, @conf_services);
239 for my $svc (@services) {
240 $len = length($svc) if length($svc) > $len;
244 for my $svc (sort keys %services) {
245 my @pf_pids = get_service_pids_from_file($svc);
246 my @ps_pids = get_service_pids_from_ps($svc);
247 my $svc_str = sprintf("%-${len}s ", $svc);
250 unless(@ps_pids or @pf_pids) {
251 msg("$svc_str is not running");
255 for my $pid (@ps_pids) {
258 my $str = "$svc_str [$pid] ";
259 my $times = `ps -o etime=,cputime= $pid`;
260 $times =~ s/^\s+|\s+$//g;
261 my @times = split(/ /, $times);
262 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
264 if ($svc eq 'router') {
267 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
268 my $dcount = scalar(@drones);
269 my $dmax = $max_children_map{$svc};
270 if (defined($dmax) && $dmax > 0) {
271 $str .= "#drones=$dcount/$dmax ";
272 $str .= sprintf('%3d%%', (int(($dcount / $dmax) * 100)));
274 $str .= "#drones=$dcount";
277 msg("\tERR $svc has no running drones.") unless @drones;
280 msg("\tERR $svc [$pid] NOT configured for this host.")
281 unless grep {$_ eq $svc} @conf_services
284 msg("\tERR $svc [$pid] NOT found in PID file.")
285 unless grep {$_ eq $pid} @pf_pids;
288 for my $pid (@pf_pids) {
290 msg("\tERR $svc Has PID file entry [$pid], ".
291 "which matches no running $svc processes");
298 sub do_start_router {
300 my $pidfile = get_pid_file('router');
301 `opensrf_router $opt_config routers $pidfile`;
303 sleep 2; # give the router time to fork (probably not need now but w/e)
306 # stop a specific service
308 my ($service, @signals) = @_;
309 @signals = qw/TERM INT KILL/ unless @signals;
310 for my $sig (@signals) {
311 last unless do_signal($service, $sig) == 2;
319 OpenSRF::System->bootstrap_client(config_file => $opt_config);
321 if (!OpenSRF::Transport::PeerHandle->retrieve) {
322 return 0 if $fail_ok;
323 die "Unable to bootstrap client for requests\n";
326 load_settings(); # load the settings config if we can
328 my $sclient = OpenSRF::Utils::SettingsClient->new;
329 my $apps = $sclient->config_value("activeapps", "appname");
331 # disconnect the top-level network handle
332 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
335 $apps = [$apps] unless ref $apps;
336 for my $app (@$apps) {
337 if (!$sclient->config_value('apps', $app)) {
338 msg("Service '$app' is listed for this host, ".
339 "but there is no configuration for it in $opt_config");
342 my $lang = $sclient->config_value('apps', $app, 'language') || '';
344 $max_children_map{$app} = $sclient->config_value(
345 'apps', $app, 'unix_config', 'max_children');
347 if ($lang =~ /perl/i) {
348 push(@perl_services, $app);
350 push(@nonperl_services, {service => $app, lang => $lang});
357 # start a specific service
361 my @pf_pids = get_service_pids_from_file($service);
362 my @ps_pids = get_service_pids_from_ps($service);
364 if (@pf_pids) { # had pidfile
367 msg("service $service already running : @ps_pids");
370 } else { # stale pidfile
372 my $pidfile = get_pid_file($service);
373 msg("removing stale pid file $pidfile");
377 } elsif (@ps_pids and not $opt_ignore_orphans) { # orphan process
379 if ($opt_force_clean_process) {
380 msg("service $service pid=@ps_pids is running with no pidfile");
381 do_signal($service, 'KILL');
383 msg("service $service pid=@ps_pids is running with no pidfile! ".
384 "use --force-clean-process to automatically kill orphan processes");
389 return do_start_router() if $service eq 'router';
391 load_settings() if $service eq 'opensrf.settings';
393 if(grep { $_ eq $service } @perl_services) {
394 return unless do_daemon($service);
395 OpenSRF::System->run_service($service, $opt_pid_dir);
398 # note: we don't daemonize non-perl services, but instead
399 # assume the controller for other languages manages that.
400 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
402 if ($svc->{lang} =~ /c/i) {
403 system("$C_COMMAND -a start -s $service");
409 # should not get here
415 msg("starting router and services for $hostname");
417 return do_start_services();
420 sub do_start_services {
421 msg("starting services for $hostname");
423 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
424 do_start('opensrf.settings');
425 # in batch mode, give opensrf.settings plenty of time to start
426 # before any non-Perl services try to connect
427 sleep $opt_settings_pause if $opt_settings_pause;
430 # start Perl services
431 for my $service (@perl_services) {
432 do_start($service) unless $service eq 'opensrf.settings';
435 # start each non-perl service individually instead of using the native
436 # start-all command. this allows us to test for existing pid files
437 # and/or running processes on each service before starting.
438 # it also means each service has to connect-fetch_setting-disconnect
439 # from jabber, which makes startup slightly slower than native start-all
440 do_start($_->{service}) for @nonperl_services;
445 # signal a single service
449 do_signal_send($service, $signal);
451 # if user passed a know non-shutdown signal, we're done.
452 return 1 if $signal =~ /HUP|USR1|USR2/;
454 return do_signal_wait($service);
457 # returns the list of running services based on presence of PID files.
458 # the 'router' service is not included by deault, since it's
459 # usually treated special.
460 sub get_service_list_from_files {
461 my $include_router = shift;
462 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
463 s/^\s*|\n//g for @services;
464 s|.*/(.*)\.pid$|$1| for @services;
465 return @services if $include_router;
466 return grep { $_ ne 'router' } @services;
470 my ($signal, @services) = @_;
471 @services = get_service_list_from_files() unless @services;
473 do_signal_send($_, $signal) for @services;
475 # if user passed a know non-shutdown signal, we're done.
476 return if $signal =~ /HUP|USR1|USR2/;
478 do_signal_wait($_) for @services;
481 # pull all opensrf listener and drone PIDs from 'ps',
482 # kill them all, and remove all pid files
483 sub do_kill_with_fire {
484 msg("killing with fire");
486 my @pids = get_running_pids();
488 next unless $_ =~ /\d+/;
489 my $proc = `ps -p $_ -o cmd=`;
491 msg("killing with fire pid=$_ $proc");
495 # remove all of the pid files
496 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
497 s/^\s*|\n//g for @files;
499 msg("removing pid file $_");
504 sub get_running_pids {
507 # start with the listeners, then drones, then routers
509 "ps x | grep 'OpenSRF Listener' ",
510 "ps x | grep 'OpenSRF Drone' ",
511 "ps x | grep 'OpenSRF Router' "
514 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
516 for my $grep (@greps) {
518 s/^\s*|\n//g for @spids;
519 push (@pids, @spids);
525 sub clear_stale_pids {
526 my @pidfile_services = get_service_list_from_files(1);
527 my @running_pids = get_running_pids();
529 for my $svc (@pidfile_services) {
530 my @pids = get_service_pids_from_file($svc);
531 for my $pid (@pids) {
532 next if grep { $_ eq $pid } @running_pids;
533 my $pidfile = get_pid_file($svc);
534 msg("removing stale pid file $pidfile");
540 sub do_stop_services {
542 @signals = qw/TERM INT KILL/ unless @signals;
544 msg("stopping services for $hostname");
545 my @services = get_service_list_from_files();
547 for my $signal (@signals) {
550 # send the signal to all PIDs
551 do_signal_send($_, $signal) for @services;
553 # then wait for them to go away
554 for my $service (@services) {
555 push(@redo, $service) if do_signal_wait($service) == 2;
559 last unless @services;
567 @signals = qw/TERM INT KILL/ unless @signals;
569 do_stop_services(@signals);
571 # graceful shutdown requires the presence of the router, so stop the
572 # router last. See if it's running first to avoid unnecessary warnings.
573 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
578 # daemonize us. return true if we're the child, false if parent
580 return 1 if $opt_no_daemon;
582 my $pid_file = get_pid_file($service);
583 my $pid = OpenSRF::Utils::safe_fork();
585 msg("starting service pid=$pid $service");
593 open STDIN, '</dev/null';
594 open STDOUT, '>/dev/null';
595 open STDERR, '>/dev/null';
596 `echo $$ > $pid_file`;
600 # parses the local settings file
602 my $conf = OpenSRF::Utils::Config->current;
603 my $cfile = $conf->bootstrap->settings_config;
604 return unless $cfile;
605 my $parser = OpenSRF::Utils::SettingsParser->new();
606 $parser->initialize( $cfile );
607 $OpenSRF::Utils::SettingsClient::host_config =
608 $parser->get_server_config($conf->env->hostname);
613 print "* $m\n" unless $opt_quiet;
619 Usage: $0 --localhost --start-all
621 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
622 OpenSRF configuration file
624 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
625 Directory where process-specific PID files are kept
627 --settings-startup-pause
628 How long to give the opensrf.settings server to start up when running
629 in batch mode (start_all). The purpose is to give plenty of time for
630 the settings server to be up and active before any non-Perl services
634 Force the hostname to be 'localhost', instead of the fully qualified
635 domain name for the machine.
638 Specifies which OpenSRF service to control
641 Do not print informational messages to STDOUT
644 Do not detach and run as a daemon process. Useful for debugging.
645 Only works for Perl services and only when starting a single service.
648 Print this help message
651 Print information about running services
653 ==== starting services =====
656 Start the router and all services
659 Start the service specified by --service
662 Start all services but do not start any routers
665 Restart the router and all services
668 Restart the service specified by --service
671 Restart all services but do not restart any routers
673 --force-clean-process
674 When starting a service, if a service process is already running
675 but no pidfile exists, kill the service process before starting
676 the new one. This applies to routers too.
679 When starting a service, if a service process is already running but
680 no pidfile exists, ignore the existing process and carry on starting
681 the new one (i.e., ignore orphans). This applies to routers too.
683 ==== stopping services =====
686 Stop the router and all services. Services are sent the TERM signal,
687 followed by the INT signal, followed by the KILL signal. With each
688 iteration, the script pauses up to --signal-timeout seconds waiting
689 for each process to die before sending the next signal.
692 Stop the service specified by --service. See also --stop-all.
693 If the requested service does not have a matching PID file, an
694 attempt to locate the PID via 'ps' will be made.
697 Stop all services but do not stop any routers. See also --stop-all.
699 --graceful-shutdown-all
700 Send TERM signal to all services + router
703 Send TERM signal to the service specified by --service
706 Send INT signal to all services + router
709 Send INT signal to the service specified by --service
711 --immediate-shutdown-all
712 Send KILL signal to all services + router
715 Send KILL signal to the service specified by --service
718 Send KILL signal to all running services + routers, regardless of
719 the presence of a PID file, and remove all PID files indiscriminately.
721 ==== signaling services =====
724 Send signal to all services
727 Name of signal to send. If --signal-all is not specified, the
728 signal will be sent to the service specified by --service.
731 Seconds to wait for a process to die after sending a shutdown signal.
732 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
734 ==== special signals ====
737 --router-de-register-all
738 Sends a SIGUSR1 signal to the selected service(s), which causes each
739 service's listener process to send an "unregister" command to all
740 registered routers. The --all variant sends the signal to all
741 running listeners. The non-(--all) variant requires a --service.
744 --router-re-register-all
745 Sends a SIGUSR2 signal to the selected service(s), which causes each
746 service's listener process to send a "register" command to all
747 configured routers. The --all variant sends the signal to all
748 running listeners. The non-(--all) variant requires a --service.
752 Sends a SIGHUP signal to the selected service(s). SIGHUP causes
753 each listener process to reload its opensrf_core.xml config file
754 and gracefully re-launch drone processes. The -all variant sends
755 the signal to all services. The non-(-all) variant requires a
761 # we do not verify services for stop/signal actions, since those may
762 # legitimately be used against services not (or no longer) configured
763 # to run on the selected host.
764 do_init() and verify_services($opt_service) if
767 $opt_start_services or
770 $opt_restart_services) and (
771 not defined $opt_service or $opt_service ne 'router'
774 # starting services. do_init() handled above
775 do_start($opt_service) if $opt_start;
776 do_stop($opt_service) and do_start($opt_service) if $opt_restart;
777 do_start_all() if $opt_start_all;
778 do_start_services() if $opt_start_services;
779 do_stop_all() and do_start_all() if $opt_restart_all;
780 do_stop_services() and do_start_services() if $opt_restart_services;
783 do_stop($opt_service) if $opt_stop;
784 do_stop_all() if $opt_stop_all;
785 do_stop_services() if $opt_stop_services;
786 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
787 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
788 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
789 do_stop_all('TERM') if $opt_shutdown_graceful_all;
790 do_stop_all('INT') if $opt_shutdown_fast_all;
791 do_stop_all('KILL') if $opt_shutdown_immediate_all;
792 do_kill_with_fire() if $opt_kill_with_fire;
795 $opt_signal = 'USR1' if $opt_router_de_register or $opt_router_de_register_all;
796 $opt_signal = 'USR2' if $opt_router_re_register or $opt_router_re_register_all;
797 $opt_signal = 'HUP' if $opt_reload or $opt_reload_all;
799 do_signal($opt_service, $opt_signal) if $opt_signal and $opt_service;
800 do_signal_all($opt_signal) if
803 $opt_router_de_register_all or
804 $opt_router_re_register_all;
807 do_diagnostic() if $opt_diagnostic;
810 # show help if no action was requested
811 do_help() if $opt_help or not (
814 $opt_start_services or
817 $opt_stop_services or
820 $opt_restart_services or
823 $opt_shutdown_graceful or
824 $opt_shutdown_graceful_all or
825 $opt_shutdown_fast or
826 $opt_shutdown_fast_all or
827 $opt_shutdown_immediate or
828 $opt_shutdown_immediate_all or
829 $opt_kill_with_fire or