2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_start_services = 0;
53 my $opt_stop_services = 0;
54 my $opt_restart_services = 0;
55 my $opt_force_clean_process = 0;
57 my $opt_diagnostic = 0;
61 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
64 'service=s' => \$opt_service,
65 'config=s' => \$opt_config,
66 'pid-dir=s' => \$opt_pid_dir,
67 'no-daemon' => \$opt_no_daemon,
68 'settings-startup-pause=i' => \$opt_settings_pause,
69 'localhost' => \$opt_localhost,
71 'quiet' => \$opt_quiet,
72 'graceful-shutdown' => \$opt_shutdown_graceful,
73 'fast-shutdown' => \$opt_shutdown_fast,
74 'immediate-shutdown' => \$opt_shutdown_immediate,
75 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
76 'fast-shutdown-all' => \$opt_shutdown_fast_all,
77 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
78 'kill-with-fire' => \$opt_kill_with_fire,
79 'force-clean-process' => \$opt_force_clean_process,
80 'signal-timeout' => \$opt_signal_timeout,
81 'signal=s' => \$opt_signal,
82 'signal-all' => \$opt_signal_all,
83 'start' => \$opt_start,
85 'start-all' => \$opt_start_all,
86 'stop-all' => \$opt_stop_all,
87 'restart' => \$opt_restart,
88 'restart-all' => \$opt_restart_all,
89 'start-services' => \$opt_start_services,
90 'stop-services' => \$opt_stop_services,
91 'restart-services' => \$opt_restart_services,
92 'diagnostic' => \$opt_diagnostic
96 $hostname = 'localhost';
97 $ENV{OSRF_HOSTNAME} = $hostname;
100 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
101 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
103 sub verify_services {
105 return 1 if $service and $service eq 'router';
106 my @services = (@perl_services, map {$_->{service}} @nonperl_services);
108 return 1 unless $service;
109 return 1 if grep { $_ eq $service } @services;
110 msg("$service is not configured to run on $hostname");
112 msg("No services are configured to run on $hostname");
114 msg("Perhaps you meant to use --localhost?") unless $opt_localhost;
122 my @pids = get_service_pids_from_file($service);
125 # no PID files exist. see if the service is running anyway
127 @pids = get_service_pids_from_ps($service);
129 msg("cannot signal $service : no pid file or running process");
134 for my $pid (@pids) {
135 if (kill($signal, $pid) == 0) { # no process was signaled.
136 msg("cannot signal $service: process $pid is not running");
137 my $pidfile = get_pid_file($service);
138 unlink $pidfile if $pidfile;
142 msg("sending $signal signal to pid=$pid $service");
148 # returns 2 if a process should have gone away but did not
149 # in the case of multiple PIDs (e.g. router), return the
150 # status of any failures, but not the successes.
153 my @pids = get_service_pids_from_file($service);
156 for my $pid (@pids) {
158 # to determine whether a process has died, we have to send
159 # a no-op signal to the PID and check the success of that signal
161 for my $i (1..$opt_signal_timeout) {
162 $sig_count = kill(0, $pid);
163 last unless $sig_count;
168 msg("timed out waiting on $service pid=$pid to die");
173 # cleanup successful. remove the PID file
174 my $pidfile = get_pid_file($service);
175 unlink $pidfile if $pidfile;
183 return "$opt_pid_dir/$service.pid";
186 # services usually only have 1 pid, but the router will have at least 2
187 sub get_service_pids_from_file {
189 my $pid_file = get_pid_file($service);
190 return () unless -e $pid_file;
191 my @pids = `cat $pid_file`;
192 s/^\s*|\n//g for @pids;
196 sub get_service_pids_from_ps {
199 my $ps = ($service eq 'router') ?
200 "ps ax | grep 'OpenSRF Router'" :
201 "ps ax | grep 'OpenSRF Listener \\[$service\\]'";
203 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
205 s/^\s*|\n//g for @pids;
212 my $alive = do_init(1);
214 my @services = get_service_list_from_files(1);
217 @conf_services = (@perl_services,
218 map {$_->{service}} @nonperl_services);
219 push(@services, @conf_services);
224 for my $svc (@services) {
225 $len = length($svc) if length($svc) > $len;
229 for my $svc (sort keys %services) {
230 my @pf_pids = get_service_pids_from_file($svc);
231 my @ps_pids = get_service_pids_from_ps($svc);
232 my $svc_str = sprintf("%-${len}s ", $svc);
235 unless(@ps_pids or @pf_pids) {
236 msg("$svc_str is not running");
240 for my $pid (@ps_pids) {
243 my $str = "$svc_str [$pid] ";
244 my $times = `ps -o etime=,cputime= $pid`;
245 $times =~ s/^\s+|\s+$//g;
246 my @times = split(/ /, $times);
247 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
249 if ($svc eq 'router') {
252 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
253 $str .= "#drones=".scalar(@drones);
255 msg("\tERR $svc has no running drones.") unless @drones;
258 msg("\tERR $svc [$pid] NOT configured for this host.")
259 unless grep {$_ eq $svc} @conf_services
262 msg("\tERR $svc [$pid] NOT found in PID file.")
263 unless grep {$_ eq $pid} @pf_pids;
266 for my $pid (@pf_pids) {
268 msg("\tERR $svc Has PID file entry [$pid], ".
269 "which matches no running $svc processes");
276 sub do_start_router {
277 `opensrf_router $opt_config routers`;
279 sleep 2; # give the router time to fork
280 my @pids = `ps -C opensrf_router -o pid=`;
281 s/^\s*|\n//g for @pids;
283 my $pidfile = get_pid_file('router');
284 open(PF, '>', $pidfile) or die "Cannot open $pidfile: $!\n";
287 msg("starting service pid=$_ router");
293 # stop a specific service
295 my ($service, @signals) = @_;
296 @signals = qw/TERM INT KILL/ unless @signals;
297 for my $sig (@signals) {
298 last unless do_signal($service, $sig) == 2;
306 OpenSRF::System->bootstrap_client(config_file => $opt_config);
308 if (!OpenSRF::Transport::PeerHandle->retrieve) {
309 return 0 if $fail_ok;
310 die "Unable to bootstrap client for requests\n";
313 load_settings(); # load the settings config if we can
315 my $sclient = OpenSRF::Utils::SettingsClient->new;
316 my $apps = $sclient->config_value("activeapps", "appname");
318 # disconnect the top-level network handle
319 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
322 $apps = [$apps] unless ref $apps;
323 for my $app (@$apps) {
324 if (!$sclient->config_value('apps', $app)) {
325 msg("Service '$app' is listed for this host, ".
326 "but there is no configuration for it in $opt_config");
329 my $lang = $sclient->config_value('apps', $app, 'language') || '';
330 if ($lang =~ /perl/i) {
331 push(@perl_services, $app);
333 push(@nonperl_services, {service => $app, lang => $lang});
340 # start a specific service
344 my @pf_pids = get_service_pids_from_file($service);
345 my @ps_pids = get_service_pids_from_ps($service);
347 if (@pf_pids) { # had pidfile
350 msg("service $service already running : @ps_pids");
353 } else { # stale pidfile
355 my $pidfile = get_pid_file($service);
356 msg("removing stale pid file $pidfile");
360 } elsif (@ps_pids) { # orphan process
362 if ($opt_force_clean_process) {
363 msg("service $service pid=@ps_pids is running with no pidfile");
364 do_signal($service, 'KILL');
366 msg("service $service pid=@ps_pids is running with no pidfile! ".
367 "use --force-clean-process to automatically kill orphan processes");
372 return do_start_router() if $service eq 'router';
374 load_settings() if $service eq 'opensrf.settings';
376 if(grep { $_ eq $service } @perl_services) {
377 return unless do_daemon($service);
378 OpenSRF::System->run_service($service, $opt_pid_dir);
381 # note: we don't daemonize non-perl services, but instead
382 # assume the controller for other languages manages that.
383 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
385 if ($svc->{lang} =~ /c/i) {
386 system("$C_COMMAND -a start -s $service");
388 } elsif ($svc->{lang} =~ /python/i) {
389 system("$PY_COMMAND -a start -s $service");
395 # should not get here
401 msg("starting router and services for $hostname");
403 return do_start_services();
406 sub do_start_services {
407 msg("starting services for $hostname");
409 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
410 do_start('opensrf.settings');
411 # in batch mode, give opensrf.settings plenty of time to start
412 # before any non-Perl services try to connect
413 sleep $opt_settings_pause if $opt_settings_pause;
416 # start Perl services
417 for my $service (@perl_services) {
418 do_start($service) unless $service eq 'opensrf.settings';
421 # start each non-perl service individually instead of using the native
422 # start-all command. this allows us to test for existing pid files
423 # and/or running processes on each service before starting.
424 # it also means each service has to connect-fetch_setting-disconnect
425 # from jabber, which makes startup slightly slower than native start-all
426 do_start($_->{service}) for @nonperl_services;
431 # signal a single service
435 return do_signal_all($signal, $service);
438 # returns the list of running services based on presence of PID files.
439 # the 'router' service is not included by deault, since it's
440 # usually treated special.
441 sub get_service_list_from_files {
442 my $include_router = shift;
443 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
444 s/^\s*|\n//g for @services;
445 s|.*/(.*)\.pid$|$1| for @services;
446 return @services if $include_router;
447 return grep { $_ ne 'router' } @services;
451 my ($signal, @services) = @_;
452 @services = get_service_list_from_files() unless @services;
454 do_signal_send($_, $signal) for @services;
456 # if user passed a know non-shutdown signal, we're done.
457 return if $signal =~ /HUP|USR1|USR2/;
459 do_signal_wait($_) for @services;
462 # pull all opensrf listener and drone PIDs from 'ps',
463 # kill them all, and remove all pid files
464 sub do_kill_with_fire {
465 msg("killing with fire");
467 my @pids = get_running_pids();
469 next unless $_ =~ /\d+/;
470 my $proc = `ps -p $_ -o cmd=`;
472 msg("killing with fire pid=$_ $proc");
476 # remove all of the pid files
477 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
478 s/^\s*|\n//g for @files;
480 msg("removing pid file $_");
485 sub get_running_pids {
488 # start with the listeners, then drones, then routers
490 "ps ax | grep 'OpenSRF Listener' ",
491 "ps ax | grep 'OpenSRF Drone' ",
492 "ps ax | grep 'OpenSRF Router' "
495 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
497 for my $grep (@greps) {
499 s/^\s*|\n//g for @spids;
500 push (@pids, @spids);
506 sub clear_stale_pids {
507 my @pidfile_services = get_service_list_from_files(1);
508 my @running_pids = get_running_pids();
510 for my $svc (@pidfile_services) {
511 my @pids = get_service_pids_from_file($svc);
512 for my $pid (@pids) {
513 next if grep { $_ eq $pid } @running_pids;
514 my $pidfile = get_pid_file($svc);
515 msg("removing stale pid file $pidfile");
521 sub do_stop_services {
523 @signals = qw/TERM INT KILL/ unless @signals;
525 msg("stopping services for $hostname");
526 my @services = get_service_list_from_files();
528 for my $signal (@signals) {
531 # send the signal to all PIDs
532 do_signal_send($_, $signal) for @services;
534 # then wait for them to go away
535 for my $service (@services) {
536 push(@redo, $service) if do_signal_wait($service) == 2;
540 last unless @services;
548 @signals = qw/TERM INT KILL/ unless @signals;
550 do_stop_services(@signals);
552 # graceful shutdown requires the presence of the router, so stop the
553 # router last. See if it's running first to avoid unnecessary warnings.
554 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
559 # daemonize us. return true if we're the child, false if parent
561 return 1 if $opt_no_daemon;
563 my $pid_file = get_pid_file($service);
564 my $pid = OpenSRF::Utils::safe_fork();
566 msg("starting service pid=$pid $service");
574 open STDIN, '</dev/null';
575 open STDOUT, '>/dev/null';
576 open STDERR, '>/dev/null';
577 `echo $$ > $pid_file`;
581 # parses the local settings file
583 my $conf = OpenSRF::Utils::Config->current;
584 my $cfile = $conf->bootstrap->settings_config;
585 return unless $cfile;
586 my $parser = OpenSRF::Utils::SettingsParser->new();
587 $parser->initialize( $cfile );
588 $OpenSRF::Utils::SettingsClient::host_config =
589 $parser->get_server_config($conf->env->hostname);
594 print "* $m\n" unless $opt_quiet;
600 Usage: $0 --localhost --start-all
602 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
603 OpenSRF configuration file
605 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
606 Directory where process-specific PID files are kept
608 --settings-startup-pause
609 How long to give the opensrf.settings server to start up when running
610 in batch mode (start_all). The purpose is to give plenty of time for
611 the settings server to be up and active before any non-Perl services
615 Force the hostname to be 'localhost', instead of the fully qualified
616 domain name for the machine.
619 Specifies which OpenSRF service to control
622 Do not print informational messages to STDOUT
625 Do not detach and run as a daemon process. Useful for debugging.
626 Only works for Perl services and only when starting a single service.
629 Print this help message
632 Print information about running services
634 ==== starting services =====
637 Start the router and all services
640 Start the service specified by --service
643 Start all services but do not start any routers
646 Restart the router and all services
649 Restart the service specified by --service
652 Restart all services but do not restart any routers
654 --force-clean-process
655 When starting a service, if a service process is already running
656 but no pidfile exists, kill the service process before starting
659 ==== stopping services =====
662 Stop the router and all services. Services are sent the TERM signal,
663 followed by the INT signal, followed by the KILL signal. With each
664 iteration, the script pauses up to --signal-timeout seconds waiting
665 for each process to die before sending the next signal.
668 Stop the service specified by --service. See also --stop-all.
669 If the requested service does not have a matching PID file, an
670 attempt to locate the PID via 'ps' will be made.
673 Stop all services but do not stop any routers. See also --stop-all.
675 --graceful-shutdown-all
676 Send TERM signal to all services + router
679 Send TERM signal to the service specified by --service
682 Send INT signal to all services + router
685 Send INT signal to the service specified by --service
687 --immediate-shutdown-all
688 Send KILL signal to all services + router
691 Send KILL signal to the service specified by --service
694 Send KILL signal to all running services + routers, regardless of
695 the presence of a PID file, and remove all PID files indiscriminately.
697 ==== signaling services =====
700 Send signal to all services
703 Name of signal to send. If --signal-all is not specified, the
704 signal will be sent to the service specified by --service.
707 Seconds to wait for a process to die after sending a shutdown signal.
708 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
714 # we do not verify services for stop/signal actions, since those may
715 # legitimately be used against services not (or no longer) configured
716 # to run on the selected host.
717 do_init() and verify_services($opt_service) if
720 $opt_start_services or
723 $opt_restart_services;
725 # starting services. do_init() handled above
726 do_start($opt_service) if $opt_start;
727 do_stop($opt_service) and do_start($opt_service) if $opt_restart;
728 do_start_all() if $opt_start_all;
729 do_start_services() if $opt_start_services;
730 do_stop_all() and do_start_all() if $opt_restart_all;
731 do_stop_services() and do_start_services() if $opt_restart_services;
734 do_stop($opt_service) if $opt_stop;
735 do_stop_all() if $opt_stop_all;
736 do_stop_services() if $opt_stop_services;
737 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
738 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
739 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
740 do_stop_all('TERM') if $opt_shutdown_graceful_all;
741 do_stop_all('INT') if $opt_shutdown_fast_all;
742 do_stop_all('KILL') if $opt_shutdown_immediate_all;
743 do_kill_with_fire() if $opt_kill_with_fire;
746 do_signal($opt_service, $opt_signal) if $opt_signal;
747 do_signal_all($opt_signal) if $opt_signal_all;
750 do_diagnostic() if $opt_diagnostic;
753 # show help if no action was requested
754 do_help() if $opt_help or not (
757 $opt_start_services or
760 $opt_stop_services or
763 $opt_restart_services or
766 $opt_shutdown_graceful or
767 $opt_shutdown_graceful_all or
768 $opt_shutdown_fast or
769 $opt_shutdown_fast_all or
770 $opt_shutdown_immediate or
771 $opt_shutdown_immediate_all or
772 $opt_kill_with_fire or