From f2b812138a4328972f5ae0dec6bd6e4f0938f2e3 Mon Sep 17 00:00:00 2001 From: scottmk Date: Tue, 4 May 2010 13:41:16 +0000 Subject: [PATCH] Fix a bug that occasionally caused OSRF not to shut down cleanly. The osrf_ctl.sh script had been using ps + grep to capture the process ID (PID) of the opensrf-c daemon so that it could send a SIGINT signal to it later to shut it down. However the script was also capturing the PIDs of the daemon's child processes (i.e. the listener processes), which hadn't yet changed to application-specific names. As a result, when shutting down, the listener processes would receive signals from two different sources: from the opensrf-c daemon and from the surrounding shell script. If the signal from opensrf-c got there first, the kill from the script would fail, and the script would abort, even though the process had been successfully killed. The solution is for opensrf.c to write the daemon's PID directly to a file, instead of relying on ps + grep to capture it. The file name is specified by an additional command line parameter, which (for upward compatibility) is currently optional. Because this change involves a change to the osrf_ctl.sh script, it will be necessary to run configure before the usual make and make install. If you are using the usual configuration, run the following from within the OSRF trunk directory: ./configure --prefix=/openils --sysconfdir=/openils/conf If you don't run configure, the old osrf_ctl.sh script will continue to work as it has in the past, and you won't get the benefit of the change. M include/opensrf/utils.h M include/opensrf/osrf_system.h M src/libopensrf/utils.c M src/libopensrf/opensrf.c M src/libopensrf/osrf_system.c M bin/osrf_ctl.sh.in git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1953 9efc2488-bf62-4759-914b-345cdb29e865 --- bin/osrf_ctl.sh.in | 5 +--- include/opensrf/osrf_system.h | 2 ++ include/opensrf/utils.h | 2 +- src/libopensrf/opensrf.c | 28 ++++++++++++++++------ src/libopensrf/osrf_system.c | 44 ++++++++++++++++++++++++++++++++--- src/libopensrf/utils.c | 21 ++++++++++++++--- 6 files changed, 84 insertions(+), 18 deletions(-) diff --git a/bin/osrf_ctl.sh.in b/bin/osrf_ctl.sh.in index b1a10df..8110442 100755 --- a/bin/osrf_ctl.sh.in +++ b/bin/osrf_ctl.sh.in @@ -174,10 +174,7 @@ start_c() { fi; do_action "start" $PID_OSRF_C "OpenSRF C (host=$host)"; - opensrf-c $host $OPT_CONFIG opensrf; - sleep 1; # give the main C proc time to appear in ps - pid=$(ps ax | grep "OpenSRF System-C" | grep -v grep | awk '{print $1}') - echo $pid > "$PID_OSRF_C"; + opensrf-c $host $OPT_CONFIG opensrf "$PID_OSRF_C"; return 0; } diff --git a/include/opensrf/osrf_system.h b/include/opensrf/osrf_system.h index e9a8ef1..b39645d 100644 --- a/include/opensrf/osrf_system.h +++ b/include/opensrf/osrf_system.h @@ -17,6 +17,8 @@ extern "C" { #endif +void osrfSystemSetPidFile( const char* name ); + int osrf_system_bootstrap_client( char* config_file, char* contextnode ); int osrfSystemBootstrapClientResc( const char* config_file, diff --git a/include/opensrf/utils.h b/include/opensrf/utils.h index 2368199..112c7ee 100644 --- a/include/opensrf/utils.h +++ b/include/opensrf/utils.h @@ -280,8 +280,8 @@ extern "C" { int init_proc_title( int argc, char* argv[] ); int set_proc_title( const char* format, ... ); - int daemonize( void ); +int daemonize_write_pid( FILE* pidfile ); void* safe_malloc(int size); void* safe_calloc(int size); diff --git a/src/libopensrf/opensrf.c b/src/libopensrf/opensrf.c index e7215d2..5afa27e 100644 --- a/src/libopensrf/opensrf.c +++ b/src/libopensrf/opensrf.c @@ -1,5 +1,17 @@ -#include - +#include "opensrf/osrf_system.h" + +/** + @brief Run an OSRF server as defined by the command line and a config file. + @param argc Number of command line arguments, plus one. + @param argv Ragged array of command name plus command line arguments. + @return 0 if successful, or 1 if failure. + + Command line parameters: + - Full network name of the host where the process is running; or 'localhost' will do. + - Name of the configuration file; normally '/openils/conf/opensrf_core.xml'. + - Name of an aggregate within the configuration file, containing the relevant subset + of configuration stuff. +*/ int main( int argc, char* argv[] ) { if( argc < 4 ) { @@ -7,11 +19,14 @@ int main( int argc, char* argv[] ) { return 1; } - /* these must be strdup'ed because init_proc_title / set_proc_title + /* these must be strdup'ed because init_proc_title / set_proc_title are evil and overwrite the argv memory */ - char* host = strdup( argv[1] ); - char* config = strdup( argv[2] ); - char* context = strdup( argv[3] ); + char* host = strdup( argv[1] ); + char* config = strdup( argv[2] ); + char* context = strdup( argv[3] ); + + if( argv[4] ) + osrfSystemSetPidFile( argv[4] ); init_proc_title( argc, argv ); set_proc_title( "OpenSRF System-C" ); @@ -26,7 +41,6 @@ int main( int argc, char* argv[] ) { ); } - free(host); free(config); free(context); diff --git a/src/libopensrf/osrf_system.c b/src/libopensrf/osrf_system.c index 1cff927..c9eb7d3 100644 --- a/src/libopensrf/osrf_system.c +++ b/src/libopensrf/osrf_system.c @@ -51,6 +51,9 @@ static volatile sig_atomic_t sig_caught; /** Boolean: set to true when we finish shutting down. */ static int shutdownComplete = 0; +/** Name of file to which to write the process ID of the child process */ +char* pidfile_name = NULL; + static void add_child( pid_t pid, const char* app, const char* libfile ); static void delete_child( ChildNode* node ); static void delete_all_children( void ); @@ -121,6 +124,27 @@ transport_client* osrfSystemGetTransportClient( void ) { return osrfGlobalTransportClient; } +/** + @brief Save a copy of a file name to be used for writing a process ID. + @param name Designated file name, or NULL. + + Save a file name for later use in saving a process ID. If @a name is NULL, leave + the file name NULL. + + When the parent process spawns a child, the child becomes a daemon. The parent writes the + child's process ID to the PID file, if one has been designated, so that some other process + can retrieve the PID later and kill the daemon. +*/ +void osrfSystemSetPidFile( const char* name ) { + if( pidfile_name ) + free( pidfile_name ); + + if( name ) + pidfile_name = strdup( name ); + else + pidfile_name = NULL; +} + /** @brief Discard the global transport_client, but without disconnecting from Jabber. @@ -222,7 +246,23 @@ int osrfSystemBootstrap( const char* hostname, const char* configfile, // Turn into a daemon. The parent forks and exits. Only the // child returns, with the standard streams (stdin, stdout, and // stderr) redirected to /dev/null. - daemonize(); + FILE* pidfile = NULL; + if( pidfile_name ) { + pidfile = fopen( pidfile_name, "w" ); + if( !pidfile ) { + osrfLogError( OSRF_LOG_MARK, "Unable to open PID file \"%s\": %s", + pidfile_name, strerror( errno ) ); + free( pidfile_name ); + pidfile_name = NULL; + return -1; + } + } + daemonize_write_pid( pidfile ); + if( pidfile ) { + fclose( pidfile ); + free( pidfile_name ); + pidfile_name = NULL; + } jsonObject* apps = osrf_settings_host_value_object("/activeapps/appname"); osrfStringArray* arr = osrfNewStringArray(8); @@ -588,8 +628,6 @@ int osrfSystemBootstrapClientResc( const char* config_file, snprintf(buf, len - 1, "%s_%s_%s_%ld", resource, host, tbuf, (long) getpid() ); if(client_connect( client, username, password, buf, 10, AUTH_DIGEST )) { - /* child nodes will leak the parents client... but we can't free - it without disconnecting the parents client :( */ osrfGlobalTransportClient = client; } diff --git a/src/libopensrf/utils.c b/src/libopensrf/utils.c index a3ca2ab..bd48953 100644 --- a/src/libopensrf/utils.c +++ b/src/libopensrf/utils.c @@ -647,11 +647,22 @@ char* uescape( const char* string, int size, int full_escape ) { @brief Become a proper daemon. @return 0 if successful, or -1 if not. - Call fork(). The parent exits. The child moves to the root - directory, detaches from the terminal, and redirects the - standard streams (stdin, stdout, stderr) to /dev/null. + Call fork(). The parent exits. The child moves to the root directory, detaches from + the terminal, and redirects the standard streams (stdin, stdout, stderr) to /dev/null. */ int daemonize( void ) { + return daemonize_write_pid( NULL ); +} +/** + @brief Become a proper daemon, and report the childs process ID. + @return 0 if successful, or -1 if not. + + Call fork(). If pidfile is not NULL, the parent writes the process ID of the child + process to the specified file. Then it exits. The child moves to the root + directory, detaches from the terminal, and redirects the standard streams (stdin, + stdout, stderr) to /dev/null. + */ +int daemonize_write_pid( FILE* pidfile ) { pid_t f = fork(); if (f == -1) { @@ -677,6 +688,10 @@ int daemonize( void ) { return 0; } else { // We're in the parent... + if( pidfile ) { + fprintf( pidfile, "%ld\n", (long) f ); + fclose( pidfile ); + } _exit(0); } } -- 2.43.2