2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_start_services = 0;
53 my $opt_stop_services = 0;
54 my $opt_restart_services = 0;
55 my $opt_force_clean_process = 0;
57 my $opt_diagnostic = 0;
61 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
64 'service=s' => \$opt_service,
65 'config=s' => \$opt_config,
66 'pid-dir=s' => \$opt_pid_dir,
67 'no-daemon' => \$opt_no_daemon,
68 'settings-startup-pause=i' => \$opt_settings_pause,
69 'localhost' => \$opt_localhost,
71 'quiet' => \$opt_quiet,
72 'graceful-shutdown' => \$opt_shutdown_graceful,
73 'fast-shutdown' => \$opt_shutdown_fast,
74 'immediate-shutdown' => \$opt_shutdown_immediate,
75 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
76 'fast-shutdown-all' => \$opt_shutdown_fast_all,
77 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
78 'kill-with-fire' => \$opt_kill_with_fire,
79 'force-clean-process' => \$opt_force_clean_process,
80 'signal-timeout' => \$opt_signal_timeout,
81 'signal=s' => \$opt_signal,
82 'signal-all' => \$opt_signal_all,
83 'start' => \$opt_start,
85 'start-all' => \$opt_start_all,
86 'stop-all' => \$opt_stop_all,
87 'restart' => \$opt_restart,
88 'restart-all' => \$opt_restart_all,
89 'start-services' => \$opt_start_services,
90 'stop-services' => \$opt_stop_services,
91 'restart-services' => \$opt_restart_services,
92 'diagnostic' => \$opt_diagnostic
96 $hostname = 'localhost';
97 $ENV{OSRF_HOSTNAME} = $hostname;
100 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
101 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
103 sub verify_services {
105 my @services = (@perl_services, map {$_->{service}} @nonperl_services);
107 return 1 unless $service;
108 return 1 if grep { $_ eq $service } @services;
109 msg("$service is not configured to run on $hostname");
111 msg("No services are configured to run on $hostname");
113 msg("Perhaps you meant to use --localhost?") unless $opt_localhost;
121 my @pids = get_service_pids_from_file($service);
124 # no PID files exist. see if the service is running anyway
126 @pids = get_service_pids_from_ps($service);
128 msg("cannot signal $service : no pid file or running process");
133 for my $pid (@pids) {
134 if (kill($signal, $pid) == 0) { # no process was signaled.
135 msg("cannot signal $service: process $pid is not running");
136 my $pidfile = get_pid_file($service);
137 unlink $pidfile if $pidfile;
141 msg("sending $signal signal to pid=$pid $service");
147 # returns 2 if a process should have gone away but did not
148 # in the case of multiple PIDs (e.g. router), return the
149 # status of any failures, but not the successes.
152 my @pids = get_service_pids_from_file($service);
155 for my $pid (@pids) {
157 # to determine whether a process has died, we have to send
158 # a no-op signal to the PID and check the success of that signal
160 for my $i (1..$opt_signal_timeout) {
161 $sig_count = kill(0, $pid);
162 last unless $sig_count;
167 msg("timed out waiting on $service pid=$pid to die");
172 # cleanup successful. remove the PID file
173 my $pidfile = get_pid_file($service);
174 unlink $pidfile if $pidfile;
182 return "$opt_pid_dir/$service.pid";
185 # services usually only have 1 pid, but the router will have at least 2
186 sub get_service_pids_from_file {
188 my $pid_file = get_pid_file($service);
189 return () unless -e $pid_file;
190 my @pids = `cat $pid_file`;
191 s/^\s*|\n//g for @pids;
195 sub get_service_pids_from_ps {
198 my $ps = ($service eq 'router') ?
199 "ps ax | grep 'OpenSRF Router'" :
200 "ps ax | grep 'OpenSRF Listener \\[$service\\]'";
202 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
204 s/^\s*|\n//g for @pids;
211 my $alive = do_init(1);
213 my @services = get_service_list_from_files(1);
216 @conf_services = (@perl_services,
217 map {$_->{service}} @nonperl_services);
218 push(@services, @conf_services);
223 for my $svc (@services) {
224 $len = length($svc) if length($svc) > $len;
228 for my $svc (sort keys %services) {
229 my @pf_pids = get_service_pids_from_file($svc);
230 my @ps_pids = get_service_pids_from_ps($svc);
231 my $svc_str = sprintf("%-${len}s ", $svc);
234 unless(@ps_pids or @pf_pids) {
235 msg("$svc_str is not running");
239 for my $pid (@ps_pids) {
242 my $str = "$svc_str [$pid] ";
243 my $times = `ps -o etime=,cputime= $pid`;
244 $times =~ s/^\s+|\s+$//g;
245 my @times = split(/ /, $times);
246 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
248 if ($svc eq 'router') {
251 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
252 $str .= "#drones=".scalar(@drones);
254 msg("\tERR $svc has no running drones.") unless @drones;
257 msg("\tERR $svc [$pid] NOT configured for this host.")
258 unless grep {$_ eq $svc} @conf_services
261 msg("\tERR $svc [$pid] NOT found in PID file.")
262 unless grep {$_ eq $pid} @pf_pids;
265 for my $pid (@pf_pids) {
267 msg("\tERR $svc Has PID file entry [$pid], ".
268 "which matches no running $svc processes");
275 sub do_start_router {
276 `opensrf_router $opt_config routers`;
278 sleep 2; # give the router time to fork
279 my @pids = `ps -C opensrf_router -o pid=`;
280 s/^\s*|\n//g for @pids;
282 my $pidfile = get_pid_file('router');
283 open(PF, '>', $pidfile) or die "Cannot open $pidfile: $!\n";
286 msg("starting service pid=$_ router");
292 # stop a specific service
294 my ($service, @signals) = @_;
295 @signals = qw/TERM INT KILL/ unless @signals;
296 for my $sig (@signals) {
297 last unless do_signal($service, $sig) == 2;
305 OpenSRF::System->bootstrap_client(config_file => $opt_config);
307 if (!OpenSRF::Transport::PeerHandle->retrieve) {
308 return 0 if $fail_ok;
309 die "Unable to bootstrap client for requests\n";
312 load_settings(); # load the settings config if we can
314 my $sclient = OpenSRF::Utils::SettingsClient->new;
315 my $apps = $sclient->config_value("activeapps", "appname");
317 # disconnect the top-level network handle
318 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
321 $apps = [$apps] unless ref $apps;
322 for my $app (@$apps) {
323 if (!$sclient->config_value('apps', $app)) {
324 msg("Service '$app' is listed for this host, ".
325 "but there is no configuration for it in $opt_config");
328 my $lang = $sclient->config_value('apps', $app, 'language') || '';
329 if ($lang =~ /perl/i) {
330 push(@perl_services, $app);
332 push(@nonperl_services, {service => $app, lang => $lang});
339 # start a specific service
343 my @pf_pids = get_service_pids_from_file($service);
344 my @ps_pids = get_service_pids_from_ps($service);
346 if (@pf_pids) { # had pidfile
349 msg("service $service already running : @ps_pids");
352 } else { # stale pidfile
354 my $pidfile = get_pid_file($service);
355 msg("removing stale pid file $pidfile");
359 } elsif (@ps_pids) { # orphan process
361 if ($opt_force_clean_process) {
362 msg("service $service pid=@ps_pids is running with no pidfile");
363 do_signal($service, 'KILL');
365 msg("service $service pid=@ps_pids is running with no pidfile! ".
366 "use --force-clean-process to automatically kill orphan processes");
371 return do_start_router() if $service eq 'router';
373 load_settings() if $service eq 'opensrf.settings';
375 if(grep { $_ eq $service } @perl_services) {
376 return unless do_daemon($service);
377 OpenSRF::System->run_service($service, $opt_pid_dir);
380 # note: we don't daemonize non-perl services, but instead
381 # assume the controller for other languages manages that.
382 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
384 if ($svc->{lang} =~ /c/i) {
385 system("$C_COMMAND -a start -s $service");
387 } elsif ($svc->{lang} =~ /python/i) {
388 system("$PY_COMMAND -a start -s $service");
394 # should not get here
400 msg("starting router and services for $hostname");
402 return do_start_services();
405 sub do_start_services {
406 msg("starting services for $hostname");
408 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
409 do_start('opensrf.settings');
410 # in batch mode, give opensrf.settings plenty of time to start
411 # before any non-Perl services try to connect
412 sleep $opt_settings_pause if $opt_settings_pause;
415 # start Perl services
416 for my $service (@perl_services) {
417 do_start($service) unless $service eq 'opensrf.settings';
420 # start each non-perl service individually instead of using the native
421 # start-all command. this allows us to test for existing pid files
422 # and/or running processes on each service before starting.
423 # it also means each service has to connect-fetch_setting-disconnect
424 # from jabber, which makes startup slightly slower than native start-all
425 do_start($_->{service}) for @nonperl_services;
430 # signal a single service
434 return do_signal_all($signal, $service);
437 # returns the list of running services based on presence of PID files.
438 # the 'router' service is not included by deault, since it's
439 # usually treated special.
440 sub get_service_list_from_files {
441 my $include_router = shift;
442 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
443 s/^\s*|\n//g for @services;
444 s|.*/(.*)\.pid$|$1| for @services;
445 return @services if $include_router;
446 return grep { $_ ne 'router' } @services;
450 my ($signal, @services) = @_;
451 @services = get_service_list_from_files() unless @services;
453 do_signal_send($_, $signal) for @services;
455 # if user passed a know non-shutdown signal, we're done.
456 return if $signal =~ /HUP|USR1|USR2/;
458 do_signal_wait($_) for @services;
461 # pull all opensrf listener and drone PIDs from 'ps',
462 # kill them all, and remove all pid files
463 sub do_kill_with_fire {
464 msg("killing with fire");
466 my @pids = get_running_pids();
468 next unless $_ =~ /\d+/;
469 my $proc = `ps -p $_ -o cmd=`;
471 msg("killing with fire pid=$_ $proc");
475 # remove all of the pid files
476 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
477 s/^\s*|\n//g for @files;
479 msg("removing pid file $_");
484 sub get_running_pids {
487 # start with the listeners, then drones, then routers
489 "ps ax | grep 'OpenSRF Listener' ",
490 "ps ax | grep 'OpenSRF Drone' ",
491 "ps ax | grep 'OpenSRF Router' "
494 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
496 for my $grep (@greps) {
498 s/^\s*|\n//g for @spids;
499 push (@pids, @spids);
505 sub clear_stale_pids {
506 my @pidfile_services = get_service_list_from_files(1);
507 my @running_pids = get_running_pids();
509 for my $svc (@pidfile_services) {
510 my @pids = get_service_pids_from_file($svc);
511 for my $pid (@pids) {
512 next if grep { $_ eq $pid } @running_pids;
513 my $pidfile = get_pid_file($svc);
514 msg("removing stale pid file $pidfile");
520 sub do_stop_services {
522 @signals = qw/TERM INT KILL/ unless @signals;
524 msg("stopping services for $hostname");
525 my @services = get_service_list_from_files();
527 for my $signal (@signals) {
530 # send the signal to all PIDs
531 do_signal_send($_, $signal) for @services;
533 # then wait for them to go away
534 for my $service (@services) {
535 push(@redo, $service) if do_signal_wait($service) == 2;
539 last unless @services;
547 @signals = qw/TERM INT KILL/ unless @signals;
549 do_stop_services(@signals);
551 # graceful shutdown requires the presence of the router, so stop the
552 # router last. See if it's running first to avoid unnecessary warnings.
553 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
558 # daemonize us. return true if we're the child, false if parent
560 return 1 if $opt_no_daemon;
562 my $pid_file = get_pid_file($service);
563 my $pid = OpenSRF::Utils::safe_fork();
565 msg("starting service pid=$pid $service");
573 open STDIN, '</dev/null';
574 open STDOUT, '>/dev/null';
575 open STDERR, '>/dev/null';
576 `echo $$ > $pid_file`;
580 # parses the local settings file
582 my $conf = OpenSRF::Utils::Config->current;
583 my $cfile = $conf->bootstrap->settings_config;
584 return unless $cfile;
585 my $parser = OpenSRF::Utils::SettingsParser->new();
586 $parser->initialize( $cfile );
587 $OpenSRF::Utils::SettingsClient::host_config =
588 $parser->get_server_config($conf->env->hostname);
593 print "* $m\n" unless $opt_quiet;
599 Usage: $0 --localhost --start-all
601 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
602 OpenSRF configuration file
604 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
605 Directory where process-specific PID files are kept
607 --settings-startup-pause
608 How long to give the opensrf.settings server to start up when running
609 in batch mode (start_all). The purpose is to give plenty of time for
610 the settings server to be up and active before any non-Perl services
614 Force the hostname to be 'localhost', instead of the fully qualified
615 domain name for the machine.
618 Specifies which OpenSRF service to control
621 Do not print informational messages to STDOUT
624 Do not detach and run as a daemon process. Useful for debugging.
625 Only works for Perl services and only when starting a single service.
628 Print this help message
631 Print information about running services
633 ==== starting services =====
636 Start the router and all services
639 Start the service specified by --service
642 Start all services but do not start any routers
645 Restart the router and all services
648 Restart the service specified by --service
651 Restart all services but do not restart any routers
653 --force-clean-process
654 When starting a service, if a service process is already running
655 but no pidfile exists, kill the service process before starting
658 ==== stopping services =====
661 Stop the router and all services. Services are sent the TERM signal,
662 followed by the INT signal, followed by the KILL signal. With each
663 iteration, the script pauses up to --signal-timeout seconds waiting
664 for each process to die before sending the next signal.
667 Stop the service specified by --service. See also --stop-all.
668 If the requested service does not have a matching PID file, an
669 attempt to locate the PID via 'ps' will be made.
672 Stop all services but do not stop any routers. See also --stop-all.
674 --graceful-shutdown-all
675 Send TERM signal to all services + router
678 Send TERM signal to the service specified by --service
681 Send INT signal to all services + router
684 Send INT signal to the service specified by --service
686 --immediate-shutdown-all
687 Send KILL signal to all services + router
690 Send KILL signal to the service specified by --service
693 Send KILL signal to all running services + routers, regardless of
694 the presence of a PID file, and remove all PID files indiscriminately.
696 ==== signaling services =====
699 Send signal to all services
702 Name of signal to send. If --signal-all is not specified, the
703 signal will be sent to the service specified by --service.
706 Seconds to wait for a process to die after sending a shutdown signal.
707 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
713 # we do not verify services for stop/signal actions, since those may
714 # legitimately be used against services not (or no longer) configured
715 # to run on the selected host.
716 do_init() and verify_services($opt_service) if
719 $opt_start_services or
722 $opt_restart_services;
724 # starting services. do_init() handled above
725 do_start($opt_service) if $opt_start;
726 do_stop($opt_service) and do_start($opt_service) if $opt_restart;
727 do_start_all() if $opt_start_all;
728 do_start_services() if $opt_start_services;
729 do_stop_all() and do_start_all() if $opt_restart_all;
730 do_stop_services() and do_start_services() if $opt_restart_services;
733 do_stop($opt_service) if $opt_stop;
734 do_stop_all() if $opt_stop_all;
735 do_stop_services() if $opt_stop_services;
736 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
737 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
738 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
739 do_stop_all('TERM') if $opt_shutdown_graceful_all;
740 do_stop_all('INT') if $opt_shutdown_fast_all;
741 do_stop_all('KILL') if $opt_shutdown_immediate_all;
742 do_kill_with_fire() if $opt_kill_with_fire;
745 do_signal($opt_service, $opt_signal) if $opt_signal;
746 do_signal_all($opt_signal) if $opt_signal_all;
749 do_diagnostic() if $opt_diagnostic;
752 # show help if no action was requested
753 do_help() if $opt_help or not (
756 $opt_start_services or
759 $opt_stop_services or
762 $opt_restart_services or
765 $opt_shutdown_graceful or
766 $opt_shutdown_graceful_all or
767 $opt_shutdown_fast or
768 $opt_shutdown_fast_all or
769 $opt_shutdown_immediate or
770 $opt_shutdown_immediate_all or
771 $opt_kill_with_fire or