2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_start_services = 0;
53 my $opt_stop_services = 0;
54 my $opt_restart_services = 0;
55 my $opt_force_clean_process = 0;
56 my $opt_router_de_register = 0;
57 my $opt_router_de_register_all = 0;
58 my $opt_router_re_register = 0;
59 my $opt_router_re_register_all = 0;
61 my $opt_reload_all = 0;
63 my $opt_diagnostic = 0;
64 my $opt_ignore_orphans = 0;
68 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
71 'service=s' => \$opt_service,
72 'config=s' => \$opt_config,
73 'pid-dir=s' => \$opt_pid_dir,
74 'no-daemon' => \$opt_no_daemon,
75 'settings-startup-pause=i' => \$opt_settings_pause,
76 'localhost' => \$opt_localhost,
78 'quiet' => \$opt_quiet,
79 'graceful-shutdown' => \$opt_shutdown_graceful,
80 'fast-shutdown' => \$opt_shutdown_fast,
81 'immediate-shutdown' => \$opt_shutdown_immediate,
82 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
83 'fast-shutdown-all' => \$opt_shutdown_fast_all,
84 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
85 'kill-with-fire' => \$opt_kill_with_fire,
86 'force-clean-process' => \$opt_force_clean_process,
87 'signal-timeout' => \$opt_signal_timeout,
88 'signal=s' => \$opt_signal,
89 'signal-all' => \$opt_signal_all,
90 'start' => \$opt_start,
92 'start-all' => \$opt_start_all,
93 'stop-all' => \$opt_stop_all,
94 'restart' => \$opt_restart,
95 'restart-all' => \$opt_restart_all,
96 'start-services' => \$opt_start_services,
97 'stop-services' => \$opt_stop_services,
98 'restart-services' => \$opt_restart_services,
99 'router-de-register' => \$opt_router_de_register,
100 'router-de-register-all' => \$opt_router_de_register_all,
101 'router-re-register' => \$opt_router_re_register,
102 'router-re-register-all' => \$opt_router_re_register_all,
103 'reload' => \$opt_reload,
104 'reload-all' => \$opt_reload_all,
105 'diagnostic' => \$opt_diagnostic,
106 'are-there-no-prisons' => \$opt_ignore_orphans
109 if ($opt_localhost) {
110 $hostname = 'localhost';
111 $ENV{OSRF_HOSTNAME} = $hostname;
114 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
115 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
117 sub verify_services {
119 return 1 if $service and $service eq 'router';
120 my @services = (@perl_services, map {$_->{service}} @nonperl_services);
122 return 1 unless $service;
123 return 1 if grep { $_ eq $service } @services;
124 msg("$service is not configured to run on $hostname");
126 msg("No services are configured to run on $hostname");
128 msg("Perhaps you meant to use --localhost?") unless $opt_localhost;
136 my @pids = get_service_pids_from_file($service);
139 # no PID files exist. see if the service is running anyway
141 @pids = get_service_pids_from_ps($service);
143 msg("cannot signal $service : no pid file or running process");
148 for my $pid (@pids) {
149 if (kill($signal, $pid) == 0) { # no process was signaled.
150 msg("cannot signal $service: process $pid is not running");
151 my $pidfile = get_pid_file($service);
152 unlink $pidfile if $pidfile;
156 msg("sending $signal signal to pid=$pid $service");
162 # returns 2 if a process should have gone away but did not
163 # in the case of multiple PIDs (e.g. router), return the
164 # status of any failures, but not the successes.
167 my @pids = get_service_pids_from_file($service);
170 for my $pid (@pids) {
172 # to determine whether a process has died, we have to send
173 # a no-op signal to the PID and check the success of that signal
175 for my $i (1..$opt_signal_timeout) {
176 $sig_count = kill(0, $pid);
177 last unless $sig_count;
182 msg("timed out waiting on $service pid=$pid to die");
187 # cleanup successful. remove the PID file
188 my $pidfile = get_pid_file($service);
189 unlink $pidfile if $pidfile;
197 return "$opt_pid_dir/$service.pid";
200 # services usually only have 1 pid, but the router will have at least 2
201 sub get_service_pids_from_file {
203 my $pid_file = get_pid_file($service);
204 return () unless -e $pid_file;
205 my @pids = `cat $pid_file`;
206 s/^\s*|\n//g for @pids;
210 sub get_service_pids_from_ps {
213 my $ps = ($service eq 'router') ?
214 "ps ax | grep 'OpenSRF Router'" :
215 "ps ax | grep 'OpenSRF Listener \\[$service\\]'";
217 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
219 s/^\s*|\n//g for @pids;
226 my $alive = do_init(1);
228 my @services = get_service_list_from_files(1);
231 @conf_services = (@perl_services,
232 map {$_->{service}} @nonperl_services);
233 push(@services, @conf_services);
238 for my $svc (@services) {
239 $len = length($svc) if length($svc) > $len;
243 for my $svc (sort keys %services) {
244 my @pf_pids = get_service_pids_from_file($svc);
245 my @ps_pids = get_service_pids_from_ps($svc);
246 my $svc_str = sprintf("%-${len}s ", $svc);
249 unless(@ps_pids or @pf_pids) {
250 msg("$svc_str is not running");
254 for my $pid (@ps_pids) {
257 my $str = "$svc_str [$pid] ";
258 my $times = `ps -o etime=,cputime= $pid`;
259 $times =~ s/^\s+|\s+$//g;
260 my @times = split(/ /, $times);
261 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
263 if ($svc eq 'router') {
266 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
267 $str .= "#drones=".scalar(@drones);
269 msg("\tERR $svc has no running drones.") unless @drones;
272 msg("\tERR $svc [$pid] NOT configured for this host.")
273 unless grep {$_ eq $svc} @conf_services
276 msg("\tERR $svc [$pid] NOT found in PID file.")
277 unless grep {$_ eq $pid} @pf_pids;
280 for my $pid (@pf_pids) {
282 msg("\tERR $svc Has PID file entry [$pid], ".
283 "which matches no running $svc processes");
290 sub do_start_router {
291 `opensrf_router $opt_config routers`;
293 sleep 2; # give the router time to fork
294 my @pids = `ps -C opensrf_router -o pid=`;
295 s/^\s*|\n//g for @pids;
297 my $pidfile = get_pid_file('router');
298 open(PF, '>', $pidfile) or die "Cannot open $pidfile: $!\n";
301 msg("starting service pid=$_ router");
307 # stop a specific service
309 my ($service, @signals) = @_;
310 @signals = qw/TERM INT KILL/ unless @signals;
311 for my $sig (@signals) {
312 last unless do_signal($service, $sig) == 2;
320 OpenSRF::System->bootstrap_client(config_file => $opt_config);
322 if (!OpenSRF::Transport::PeerHandle->retrieve) {
323 return 0 if $fail_ok;
324 die "Unable to bootstrap client for requests\n";
327 load_settings(); # load the settings config if we can
329 my $sclient = OpenSRF::Utils::SettingsClient->new;
330 my $apps = $sclient->config_value("activeapps", "appname");
332 # disconnect the top-level network handle
333 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
336 $apps = [$apps] unless ref $apps;
337 for my $app (@$apps) {
338 if (!$sclient->config_value('apps', $app)) {
339 msg("Service '$app' is listed for this host, ".
340 "but there is no configuration for it in $opt_config");
343 my $lang = $sclient->config_value('apps', $app, 'language') || '';
344 if ($lang =~ /perl/i) {
345 push(@perl_services, $app);
347 push(@nonperl_services, {service => $app, lang => $lang});
354 # start a specific service
358 my @pf_pids = get_service_pids_from_file($service);
359 my @ps_pids = get_service_pids_from_ps($service);
361 if (@pf_pids) { # had pidfile
364 msg("service $service already running : @ps_pids");
367 } else { # stale pidfile
369 my $pidfile = get_pid_file($service);
370 msg("removing stale pid file $pidfile");
374 } elsif (@ps_pids and not $opt_ignore_orphans) { # orphan process
376 if ($opt_force_clean_process) {
377 msg("service $service pid=@ps_pids is running with no pidfile");
378 do_signal($service, 'KILL');
380 msg("service $service pid=@ps_pids is running with no pidfile! ".
381 "use --force-clean-process to automatically kill orphan processes");
386 return do_start_router() if $service eq 'router';
388 load_settings() if $service eq 'opensrf.settings';
390 if(grep { $_ eq $service } @perl_services) {
391 return unless do_daemon($service);
392 OpenSRF::System->run_service($service, $opt_pid_dir);
395 # note: we don't daemonize non-perl services, but instead
396 # assume the controller for other languages manages that.
397 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
399 if ($svc->{lang} =~ /c/i) {
400 system("$C_COMMAND -a start -s $service");
402 } elsif ($svc->{lang} =~ /python/i) {
403 system("$PY_COMMAND -a start -s $service");
409 # should not get here
415 msg("starting router and services for $hostname");
417 return do_start_services();
420 sub do_start_services {
421 msg("starting services for $hostname");
423 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
424 do_start('opensrf.settings');
425 # in batch mode, give opensrf.settings plenty of time to start
426 # before any non-Perl services try to connect
427 sleep $opt_settings_pause if $opt_settings_pause;
430 # start Perl services
431 for my $service (@perl_services) {
432 do_start($service) unless $service eq 'opensrf.settings';
435 # start each non-perl service individually instead of using the native
436 # start-all command. this allows us to test for existing pid files
437 # and/or running processes on each service before starting.
438 # it also means each service has to connect-fetch_setting-disconnect
439 # from jabber, which makes startup slightly slower than native start-all
440 do_start($_->{service}) for @nonperl_services;
445 # signal a single service
449 return do_signal_all($signal, $service);
452 # returns the list of running services based on presence of PID files.
453 # the 'router' service is not included by deault, since it's
454 # usually treated special.
455 sub get_service_list_from_files {
456 my $include_router = shift;
457 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
458 s/^\s*|\n//g for @services;
459 s|.*/(.*)\.pid$|$1| for @services;
460 return @services if $include_router;
461 return grep { $_ ne 'router' } @services;
465 my ($signal, @services) = @_;
466 @services = get_service_list_from_files() unless @services;
468 do_signal_send($_, $signal) for @services;
470 # if user passed a know non-shutdown signal, we're done.
471 return if $signal =~ /HUP|USR1|USR2/;
473 do_signal_wait($_) for @services;
476 # pull all opensrf listener and drone PIDs from 'ps',
477 # kill them all, and remove all pid files
478 sub do_kill_with_fire {
479 msg("killing with fire");
481 my @pids = get_running_pids();
483 next unless $_ =~ /\d+/;
484 my $proc = `ps -p $_ -o cmd=`;
486 msg("killing with fire pid=$_ $proc");
490 # remove all of the pid files
491 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
492 s/^\s*|\n//g for @files;
494 msg("removing pid file $_");
499 sub get_running_pids {
502 # start with the listeners, then drones, then routers
504 "ps ax | grep 'OpenSRF Listener' ",
505 "ps ax | grep 'OpenSRF Drone' ",
506 "ps ax | grep 'OpenSRF Router' "
509 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
511 for my $grep (@greps) {
513 s/^\s*|\n//g for @spids;
514 push (@pids, @spids);
520 sub clear_stale_pids {
521 my @pidfile_services = get_service_list_from_files(1);
522 my @running_pids = get_running_pids();
524 for my $svc (@pidfile_services) {
525 my @pids = get_service_pids_from_file($svc);
526 for my $pid (@pids) {
527 next if grep { $_ eq $pid } @running_pids;
528 my $pidfile = get_pid_file($svc);
529 msg("removing stale pid file $pidfile");
535 sub do_stop_services {
537 @signals = qw/TERM INT KILL/ unless @signals;
539 msg("stopping services for $hostname");
540 my @services = get_service_list_from_files();
542 for my $signal (@signals) {
545 # send the signal to all PIDs
546 do_signal_send($_, $signal) for @services;
548 # then wait for them to go away
549 for my $service (@services) {
550 push(@redo, $service) if do_signal_wait($service) == 2;
554 last unless @services;
562 @signals = qw/TERM INT KILL/ unless @signals;
564 do_stop_services(@signals);
566 # graceful shutdown requires the presence of the router, so stop the
567 # router last. See if it's running first to avoid unnecessary warnings.
568 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
573 # daemonize us. return true if we're the child, false if parent
575 return 1 if $opt_no_daemon;
577 my $pid_file = get_pid_file($service);
578 my $pid = OpenSRF::Utils::safe_fork();
580 msg("starting service pid=$pid $service");
588 open STDIN, '</dev/null';
589 open STDOUT, '>/dev/null';
590 open STDERR, '>/dev/null';
591 `echo $$ > $pid_file`;
595 # parses the local settings file
597 my $conf = OpenSRF::Utils::Config->current;
598 my $cfile = $conf->bootstrap->settings_config;
599 return unless $cfile;
600 my $parser = OpenSRF::Utils::SettingsParser->new();
601 $parser->initialize( $cfile );
602 $OpenSRF::Utils::SettingsClient::host_config =
603 $parser->get_server_config($conf->env->hostname);
608 print "* $m\n" unless $opt_quiet;
614 Usage: $0 --localhost --start-all
616 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
617 OpenSRF configuration file
619 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
620 Directory where process-specific PID files are kept
622 --settings-startup-pause
623 How long to give the opensrf.settings server to start up when running
624 in batch mode (start_all). The purpose is to give plenty of time for
625 the settings server to be up and active before any non-Perl services
629 Force the hostname to be 'localhost', instead of the fully qualified
630 domain name for the machine.
633 Specifies which OpenSRF service to control
636 Do not print informational messages to STDOUT
639 Do not detach and run as a daemon process. Useful for debugging.
640 Only works for Perl services and only when starting a single service.
643 Print this help message
646 Print information about running services
648 ==== starting services =====
651 Start the router and all services
654 Start the service specified by --service
657 Start all services but do not start any routers
660 Restart the router and all services
663 Restart the service specified by --service
666 Restart all services but do not restart any routers
668 --force-clean-process
669 When starting a service, if a service process is already running
670 but no pidfile exists, kill the service process before starting
671 the new one. This applies to routers too.
673 --are-there-no-prisons
674 When starting a service, if a service procses is already running but
675 no pidfile exists, ignore the existing process and carry on starting
676 the new one (i.e., ignore orphans). This applies to routers too.
678 ==== stopping services =====
681 Stop the router and all services. Services are sent the TERM signal,
682 followed by the INT signal, followed by the KILL signal. With each
683 iteration, the script pauses up to --signal-timeout seconds waiting
684 for each process to die before sending the next signal.
687 Stop the service specified by --service. See also --stop-all.
688 If the requested service does not have a matching PID file, an
689 attempt to locate the PID via 'ps' will be made.
692 Stop all services but do not stop any routers. See also --stop-all.
694 --graceful-shutdown-all
695 Send TERM signal to all services + router
698 Send TERM signal to the service specified by --service
701 Send INT signal to all services + router
704 Send INT signal to the service specified by --service
706 --immediate-shutdown-all
707 Send KILL signal to all services + router
710 Send KILL signal to the service specified by --service
713 Send KILL signal to all running services + routers, regardless of
714 the presence of a PID file, and remove all PID files indiscriminately.
716 ==== signaling services =====
719 Send signal to all services
722 Name of signal to send. If --signal-all is not specified, the
723 signal will be sent to the service specified by --service.
726 Seconds to wait for a process to die after sending a shutdown signal.
727 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
729 ==== special signals ====
732 --router-de-register-all
733 Sends a SIGUSR1 signal to the selected service(s), which causes each
734 service's listener process to send an "unregister" command to all
735 registered routers. The --all variant sends the signal to all
736 running listeners. The non-(--all) variant requires a --service.
739 --router-re-register-all
740 Sends a SIGUSR2 signal to the selected service(s), which causes each
741 service's listener process to send a "register" command to all
742 configured routers. The --all variant sends the signal to all
743 running listeners. The non-(--all) variant requires a --service.
747 Sends a SIGHUP signal to the selected service(s). SIGHUP causes
748 each listener process to reload its opensrf_core.xml config file
749 and gracefully re-launch drone processes. The -all variant sends
750 the signal to all services. The non-(-all) variant requires a
756 # we do not verify services for stop/signal actions, since those may
757 # legitimately be used against services not (or no longer) configured
758 # to run on the selected host.
759 do_init() and verify_services($opt_service) if
762 $opt_start_services or
765 $opt_restart_services) and $opt_service ne 'router';
767 # starting services. do_init() handled above
768 do_start($opt_service) if $opt_start;
769 do_stop($opt_service) and do_start($opt_service) if $opt_restart;
770 do_start_all() if $opt_start_all;
771 do_start_services() if $opt_start_services;
772 do_stop_all() and do_start_all() if $opt_restart_all;
773 do_stop_services() and do_start_services() if $opt_restart_services;
776 do_stop($opt_service) if $opt_stop;
777 do_stop_all() if $opt_stop_all;
778 do_stop_services() if $opt_stop_services;
779 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
780 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
781 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
782 do_stop_all('TERM') if $opt_shutdown_graceful_all;
783 do_stop_all('INT') if $opt_shutdown_fast_all;
784 do_stop_all('KILL') if $opt_shutdown_immediate_all;
785 do_kill_with_fire() if $opt_kill_with_fire;
788 $opt_signal = 'USR1' if $opt_router_de_register or $opt_router_de_register_all;
789 $opt_signal = 'USR2' if $opt_router_re_register or $opt_router_re_register_all;
790 $opt_signal = 'HUP' if $opt_reload or $opt_reload_all;
792 do_signal($opt_service, $opt_signal) if $opt_signal and $opt_service;
793 do_signal_all($opt_signal) if
796 $opt_router_de_register_all or
797 $opt_router_re_register_all;
800 do_diagnostic() if $opt_diagnostic;
803 # show help if no action was requested
804 do_help() if $opt_help or not (
807 $opt_start_services or
810 $opt_stop_services or
813 $opt_restart_services or
816 $opt_shutdown_graceful or
817 $opt_shutdown_graceful_all or
818 $opt_shutdown_fast or
819 $opt_shutdown_fast_all or
820 $opt_shutdown_immediate or
821 $opt_shutdown_immediate_all or
822 $opt_kill_with_fire or