2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_start_services = 0;
53 my $opt_stop_services = 0;
54 my $opt_restart_services = 0;
55 my $opt_force_clean_process = 0;
56 my $opt_router_de_register = 0;
57 my $opt_router_de_register_all = 0;
58 my $opt_router_re_register = 0;
59 my $opt_router_re_register_all = 0;
61 my $opt_reload_all = 0;
63 my $opt_diagnostic = 0;
64 my $opt_ignore_orphans = 0;
69 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
72 'service=s' => \$opt_service,
73 'config=s' => \$opt_config,
74 'pid-dir=s' => \$opt_pid_dir,
75 'no-daemon' => \$opt_no_daemon,
76 'settings-startup-pause=i' => \$opt_settings_pause,
77 'localhost' => \$opt_localhost,
79 'quiet' => \$opt_quiet,
80 'graceful-shutdown' => \$opt_shutdown_graceful,
81 'fast-shutdown' => \$opt_shutdown_fast,
82 'immediate-shutdown' => \$opt_shutdown_immediate,
83 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
84 'fast-shutdown-all' => \$opt_shutdown_fast_all,
85 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
86 'kill-with-fire' => \$opt_kill_with_fire,
87 'force-clean-process' => \$opt_force_clean_process,
88 'signal-timeout' => \$opt_signal_timeout,
89 'signal=s' => \$opt_signal,
90 'signal-all' => \$opt_signal_all,
91 'start' => \$opt_start,
93 'start-all' => \$opt_start_all,
94 'stop-all' => \$opt_stop_all,
95 'restart' => \$opt_restart,
96 'restart-all' => \$opt_restart_all,
97 'start-services' => \$opt_start_services,
98 'stop-services' => \$opt_stop_services,
99 'restart-services' => \$opt_restart_services,
100 'router-de-register' => \$opt_router_de_register,
101 'router-de-register-all' => \$opt_router_de_register_all,
102 'router-re-register' => \$opt_router_re_register,
103 'router-re-register-all' => \$opt_router_re_register_all,
104 'reload' => \$opt_reload,
105 'reload-all' => \$opt_reload_all,
106 'diagnostic' => \$opt_diagnostic,
107 'ignore-orphans' => \$opt_ignore_orphans
110 if ($opt_localhost) {
111 $hostname = 'localhost';
112 $ENV{OSRF_HOSTNAME} = $hostname;
115 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
117 sub verify_services {
119 return 1 if $service and $service eq 'router';
120 my @services = (@perl_services, map {$_->{service}} @nonperl_services);
122 return 1 unless $service;
123 return 1 if grep { $_ eq $service } @services;
124 msg("$service is not configured to run on $hostname");
126 msg("No services are configured to run on $hostname");
128 msg("Perhaps you meant to use --localhost?") unless $opt_localhost;
136 my @pids = get_service_pids_from_file($service);
139 # no PID files exist. see if the service is running anyway
141 @pids = get_service_pids_from_ps($service);
143 msg("cannot signal $service : no pid file or running process");
148 for my $pid (@pids) {
149 if (kill($signal, $pid) == 0) { # no process was signaled.
150 msg("cannot signal $service: process $pid is not running");
151 my $pidfile = get_pid_file($service);
152 unlink $pidfile if $pidfile;
156 msg("sending $signal signal to pid=$pid $service");
162 # returns 2 if a process should have gone away but did not
163 # in the case of multiple PIDs (e.g. router), return the
164 # status of any failures, but not the successes.
167 my @pids = get_service_pids_from_file($service);
170 for my $pid (@pids) {
172 # to determine whether a process has died, we have to send
173 # a no-op signal to the PID and check the success of that signal
175 for my $i (1..$opt_signal_timeout) {
176 $sig_count = kill(0, $pid);
177 last unless $sig_count;
182 msg("timed out waiting on $service pid=$pid to die");
187 # cleanup successful. remove the PID file
188 my $pidfile = get_pid_file($service);
189 unlink $pidfile if $pidfile;
197 return "$opt_pid_dir/$service.pid";
200 # services usually only have 1 pid, but the router will have at least 2
201 sub get_service_pids_from_file {
203 my $pid_file = get_pid_file($service);
204 return () unless -e $pid_file;
205 my @pids = `cat $pid_file`;
206 s/^\s*|\n//g for @pids;
210 sub get_service_pids_from_ps {
213 my $ps = ($service eq 'router') ?
214 "ps x | grep 'OpenSRF Router'" :
215 "ps x | grep 'OpenSRF Listener \\[$service\\]'";
217 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
219 s/^\s*|\n//g for @pids;
226 my $alive = do_init(1);
228 my @services = get_service_list_from_files(1);
231 @conf_services = (@perl_services,
232 map {$_->{service}} @nonperl_services);
233 push(@services, @conf_services);
238 for my $svc (@services) {
239 $len = length($svc) if length($svc) > $len;
243 for my $svc (sort keys %services) {
244 my @pf_pids = get_service_pids_from_file($svc);
245 my @ps_pids = get_service_pids_from_ps($svc);
246 my $svc_str = sprintf("%-${len}s ", $svc);
249 unless(@ps_pids or @pf_pids) {
250 msg("$svc_str is not running");
254 for my $pid (@ps_pids) {
257 my $str = "$svc_str [$pid] ";
258 my $times = `ps -o etime=,cputime= $pid`;
259 $times =~ s/^\s+|\s+$//g;
260 my @times = split(/ /, $times);
261 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
263 if ($svc eq 'router') {
266 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
267 my $dcount = scalar(@drones);
268 my $dmax = $max_children_map{$svc};
269 if (defined($dmax) && $dmax > 0) {
270 $str .= "#drones=$dcount/$dmax ";
271 $str .= sprintf('%3d%%', (int(($dcount / $dmax) * 100)));
273 $str .= "#drones=$dcount";
276 msg("\tERR $svc has no running drones.") unless @drones;
279 msg("\tERR $svc [$pid] NOT configured for this host.")
280 unless grep {$_ eq $svc} @conf_services
283 msg("\tERR $svc [$pid] NOT found in PID file.")
284 unless grep {$_ eq $pid} @pf_pids;
287 for my $pid (@pf_pids) {
289 msg("\tERR $svc Has PID file entry [$pid], ".
290 "which matches no running $svc processes");
297 sub do_start_router {
299 my $pidfile = get_pid_file('router');
300 `opensrf_router $opt_config routers $pidfile`;
302 sleep 2; # give the router time to fork (probably not need now but w/e)
305 # stop a specific service
307 my ($service, @signals) = @_;
308 @signals = qw/TERM INT KILL/ unless @signals;
309 for my $sig (@signals) {
310 last unless do_signal($service, $sig) == 2;
318 OpenSRF::System->bootstrap_client(config_file => $opt_config);
320 if (!OpenSRF::Transport::PeerHandle->retrieve) {
321 return 0 if $fail_ok;
322 die "Unable to bootstrap client for requests\n";
325 load_settings(); # load the settings config if we can
327 my $sclient = OpenSRF::Utils::SettingsClient->new;
328 my $apps = $sclient->config_value("activeapps", "appname");
330 # disconnect the top-level network handle
331 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
334 $apps = [$apps] unless ref $apps;
335 for my $app (@$apps) {
336 if (!$sclient->config_value('apps', $app)) {
337 msg("Service '$app' is listed for this host, ".
338 "but there is no configuration for it in $opt_config");
341 my $lang = $sclient->config_value('apps', $app, 'language') || '';
343 $max_children_map{$app} = $sclient->config_value(
344 'apps', $app, 'unix_config', 'max_children');
346 if ($lang =~ /perl/i) {
347 push(@perl_services, $app);
349 push(@nonperl_services, {service => $app, lang => $lang});
356 # start a specific service
360 my @pf_pids = get_service_pids_from_file($service);
361 my @ps_pids = get_service_pids_from_ps($service);
363 if (@pf_pids) { # had pidfile
366 msg("service $service already running : @ps_pids");
369 } else { # stale pidfile
371 my $pidfile = get_pid_file($service);
372 msg("removing stale pid file $pidfile");
376 } elsif (@ps_pids and not $opt_ignore_orphans) { # orphan process
378 if ($opt_force_clean_process) {
379 msg("service $service pid=@ps_pids is running with no pidfile");
380 do_signal($service, 'KILL');
382 msg("service $service pid=@ps_pids is running with no pidfile! ".
383 "use --force-clean-process to automatically kill orphan processes");
388 return do_start_router() if $service eq 'router';
390 load_settings() if $service eq 'opensrf.settings';
392 if(grep { $_ eq $service } @perl_services) {
393 return unless do_daemon($service);
394 OpenSRF::System->run_service($service, $opt_pid_dir);
397 # note: we don't daemonize non-perl services, but instead
398 # assume the controller for other languages manages that.
399 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
401 if ($svc->{lang} =~ /c/i) {
402 system("$C_COMMAND -a start -s $service");
408 # should not get here
414 msg("starting router and services for $hostname");
416 return do_start_services();
419 sub do_start_services {
420 msg("starting services for $hostname");
422 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
423 do_start('opensrf.settings');
424 # in batch mode, give opensrf.settings plenty of time to start
425 # before any non-Perl services try to connect
426 sleep $opt_settings_pause if $opt_settings_pause;
429 # start Perl services
430 for my $service (@perl_services) {
431 do_start($service) unless $service eq 'opensrf.settings';
434 # start each non-perl service individually instead of using the native
435 # start-all command. this allows us to test for existing pid files
436 # and/or running processes on each service before starting.
437 # it also means each service has to connect-fetch_setting-disconnect
438 # from jabber, which makes startup slightly slower than native start-all
439 do_start($_->{service}) for @nonperl_services;
444 # signal a single service
448 do_signal_send($service, $signal);
450 # if user passed a know non-shutdown signal, we're done.
451 return 1 if $signal =~ /HUP|USR1|USR2/;
453 return do_signal_wait($service);
456 # returns the list of running services based on presence of PID files.
457 # the 'router' service is not included by deault, since it's
458 # usually treated special.
459 sub get_service_list_from_files {
460 my $include_router = shift;
461 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
462 s/^\s*|\n//g for @services;
463 s|.*/(.*)\.pid$|$1| for @services;
464 return @services if $include_router;
465 return grep { $_ ne 'router' } @services;
469 my ($signal, @services) = @_;
470 @services = get_service_list_from_files() unless @services;
472 do_signal_send($_, $signal) for @services;
474 # if user passed a know non-shutdown signal, we're done.
475 return if $signal =~ /HUP|USR1|USR2/;
477 do_signal_wait($_) for @services;
480 # pull all opensrf listener and drone PIDs from 'ps',
481 # kill them all, and remove all pid files
482 sub do_kill_with_fire {
483 msg("killing with fire");
485 my @pids = get_running_pids();
487 next unless $_ =~ /\d+/;
488 my $proc = `ps -p $_ -o cmd=`;
490 msg("killing with fire pid=$_ $proc");
494 # remove all of the pid files
495 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
496 s/^\s*|\n//g for @files;
498 msg("removing pid file $_");
503 sub get_running_pids {
506 # start with the listeners, then drones, then routers
508 "ps x | grep 'OpenSRF Listener' ",
509 "ps x | grep 'OpenSRF Drone' ",
510 "ps x | grep 'OpenSRF Router' "
513 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
515 for my $grep (@greps) {
517 s/^\s*|\n//g for @spids;
518 push (@pids, @spids);
524 sub clear_stale_pids {
525 my @pidfile_services = get_service_list_from_files(1);
526 my @running_pids = get_running_pids();
528 for my $svc (@pidfile_services) {
529 my @pids = get_service_pids_from_file($svc);
530 for my $pid (@pids) {
531 next if grep { $_ eq $pid } @running_pids;
532 my $pidfile = get_pid_file($svc);
533 msg("removing stale pid file $pidfile");
539 sub do_stop_services {
541 @signals = qw/TERM INT KILL/ unless @signals;
543 msg("stopping services for $hostname");
544 my @services = get_service_list_from_files();
546 for my $signal (@signals) {
549 # send the signal to all PIDs
550 do_signal_send($_, $signal) for @services;
552 # then wait for them to go away
553 for my $service (@services) {
554 push(@redo, $service) if do_signal_wait($service) == 2;
558 last unless @services;
566 @signals = qw/TERM INT KILL/ unless @signals;
568 do_stop_services(@signals);
570 # graceful shutdown requires the presence of the router, so stop the
571 # router last. See if it's running first to avoid unnecessary warnings.
572 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
577 # daemonize us. return true if we're the child, false if parent
579 return 1 if $opt_no_daemon;
581 my $pid_file = get_pid_file($service);
582 my $pid = OpenSRF::Utils::safe_fork();
584 msg("starting service pid=$pid $service");
592 open STDIN, '</dev/null';
593 open STDOUT, '>/dev/null';
594 open STDERR, '>/dev/null';
595 `echo $$ > $pid_file`;
599 # parses the local settings file
601 my $conf = OpenSRF::Utils::Config->current;
602 my $cfile = $conf->bootstrap->settings_config;
603 return unless $cfile;
604 my $parser = OpenSRF::Utils::SettingsParser->new();
605 $parser->initialize( $cfile );
606 $OpenSRF::Utils::SettingsClient::host_config =
607 $parser->get_server_config($conf->env->hostname);
612 print "* $m\n" unless $opt_quiet;
618 Usage: $0 --localhost --start-all
620 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
621 OpenSRF configuration file
623 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
624 Directory where process-specific PID files are kept
626 --settings-startup-pause
627 How long to give the opensrf.settings server to start up when running
628 in batch mode (start_all). The purpose is to give plenty of time for
629 the settings server to be up and active before any non-Perl services
633 Force the hostname to be 'localhost', instead of the fully qualified
634 domain name for the machine.
637 Specifies which OpenSRF service to control
640 Do not print informational messages to STDOUT
643 Do not detach and run as a daemon process. Useful for debugging.
644 Only works for Perl services and only when starting a single service.
647 Print this help message
650 Print information about running services
652 ==== starting services =====
655 Start the router and all services
658 Start the service specified by --service
661 Start all services but do not start any routers
664 Restart the router and all services
667 Restart the service specified by --service
670 Restart all services but do not restart any routers
672 --force-clean-process
673 When starting a service, if a service process is already running
674 but no pidfile exists, kill the service process before starting
675 the new one. This applies to routers too.
678 When starting a service, if a service process is already running but
679 no pidfile exists, ignore the existing process and carry on starting
680 the new one (i.e., ignore orphans). This applies to routers too.
682 ==== stopping services =====
685 Stop the router and all services. Services are sent the TERM signal,
686 followed by the INT signal, followed by the KILL signal. With each
687 iteration, the script pauses up to --signal-timeout seconds waiting
688 for each process to die before sending the next signal.
691 Stop the service specified by --service. See also --stop-all.
692 If the requested service does not have a matching PID file, an
693 attempt to locate the PID via 'ps' will be made.
696 Stop all services but do not stop any routers. See also --stop-all.
698 --graceful-shutdown-all
699 Send TERM signal to all services + router
702 Send TERM signal to the service specified by --service
705 Send INT signal to all services + router
708 Send INT signal to the service specified by --service
710 --immediate-shutdown-all
711 Send KILL signal to all services + router
714 Send KILL signal to the service specified by --service
717 Send KILL signal to all running services + routers, regardless of
718 the presence of a PID file, and remove all PID files indiscriminately.
720 ==== signaling services =====
723 Send signal to all services
726 Name of signal to send. If --signal-all is not specified, the
727 signal will be sent to the service specified by --service.
730 Seconds to wait for a process to die after sending a shutdown signal.
731 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
733 ==== special signals ====
736 --router-de-register-all
737 Sends a SIGUSR1 signal to the selected service(s), which causes each
738 service's listener process to send an "unregister" command to all
739 registered routers. The --all variant sends the signal to all
740 running listeners. The non-(--all) variant requires a --service.
743 --router-re-register-all
744 Sends a SIGUSR2 signal to the selected service(s), which causes each
745 service's listener process to send a "register" command to all
746 configured routers. The --all variant sends the signal to all
747 running listeners. The non-(--all) variant requires a --service.
751 Sends a SIGHUP signal to the selected service(s). SIGHUP causes
752 each listener process to reload its opensrf_core.xml config file
753 and gracefully re-launch drone processes. The -all variant sends
754 the signal to all services. The non-(-all) variant requires a
760 # we do not verify services for stop/signal actions, since those may
761 # legitimately be used against services not (or no longer) configured
762 # to run on the selected host.
763 do_init() and verify_services($opt_service) if
766 $opt_start_services or
769 $opt_restart_services) and (
770 not defined $opt_service or $opt_service ne 'router'
773 # starting services. do_init() handled above
774 do_start($opt_service) if $opt_start;
775 do_stop($opt_service) and do_start($opt_service) if $opt_restart;
776 do_start_all() if $opt_start_all;
777 do_start_services() if $opt_start_services;
778 do_stop_all() and do_start_all() if $opt_restart_all;
779 do_stop_services() and do_start_services() if $opt_restart_services;
782 do_stop($opt_service) if $opt_stop;
783 do_stop_all() if $opt_stop_all;
784 do_stop_services() if $opt_stop_services;
785 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
786 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
787 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
788 do_stop_all('TERM') if $opt_shutdown_graceful_all;
789 do_stop_all('INT') if $opt_shutdown_fast_all;
790 do_stop_all('KILL') if $opt_shutdown_immediate_all;
791 do_kill_with_fire() if $opt_kill_with_fire;
794 $opt_signal = 'USR1' if $opt_router_de_register or $opt_router_de_register_all;
795 $opt_signal = 'USR2' if $opt_router_re_register or $opt_router_re_register_all;
796 $opt_signal = 'HUP' if $opt_reload or $opt_reload_all;
798 do_signal($opt_service, $opt_signal) if $opt_signal and $opt_service;
799 do_signal_all($opt_signal) if
802 $opt_router_de_register_all or
803 $opt_router_re_register_all;
806 do_diagnostic() if $opt_diagnostic;
809 # show help if no action was requested
810 do_help() if $opt_help or not (
813 $opt_start_services or
816 $opt_stop_services or
819 $opt_restart_services or
822 $opt_shutdown_graceful or
823 $opt_shutdown_graceful_all or
824 $opt_shutdown_fast or
825 $opt_shutdown_fast_all or
826 $opt_shutdown_immediate or
827 $opt_shutdown_immediate_all or
828 $opt_kill_with_fire or