From e3414a2937c369a160abf178e91f91e4866c5140 Mon Sep 17 00:00:00 2001 From: scottmk Date: Sun, 20 Dec 2009 06:37:02 +0000 Subject: [PATCH 1/1] 1. In the parent router process: wait for all of the immediate child processes to terminate before exiting. This change eliminates the need for the shell script invoking the router to sleep before running a ps to identify the effective router processes (which are grandchildren of the parent). By the time the parent exits, the children will have launched the grandchildren and exited. 2. If any of the immediate child processes terminates abnormally (either a non-zero return code or termination by a signal), issue a warning message to that effect. This message goes to standard error, since the parent process never opens a log file. 3. Apply the volatile qualifier to a couple of variables that are updated asynchronously by a signal handler. M src/router/osrf_router.c M src/router/osrf_router_main.c git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1878 9efc2488-bf62-4759-914b-345cdb29e865 --- src/router/osrf_router.c | 2 +- src/router/osrf_router_main.c | 58 +++++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/src/router/osrf_router.c b/src/router/osrf_router.c index b847807..ede2ee3 100644 --- a/src/router/osrf_router.c +++ b/src/router/osrf_router.c @@ -40,7 +40,7 @@ struct osrfRouterStruct { char* resource; /**< Router's resource name for the Jabber logon. */ char* password; /**< Router's password for the Jabber logon. */ int port; /**< Jabber's port number. */ - sig_atomic_t stop; /**< To be set by signal handler to interrupt main loop. */ + volatile sig_atomic_t stop; /**< To be set by signal handler to interrupt main loop. */ /** Array of client domains that we allow to send requests through us. */ osrfStringArray* trustedClients; diff --git a/src/router/osrf_router_main.c b/src/router/osrf_router_main.c index bf3b387..0983991 100644 --- a/src/router/osrf_router_main.c +++ b/src/router/osrf_router_main.c @@ -16,6 +16,9 @@ */ #include +#include +#include +#include #include "opensrf/utils.h" #include "opensrf/log.h" #include "opensrf/osrf_list.h" @@ -25,7 +28,7 @@ static osrfRouter* router = NULL; -static sig_atomic_t stop_signal = 0; +static volatile sig_atomic_t stop_signal = 0; static void setupRouter(jsonObject* configChunk); @@ -86,6 +89,8 @@ int main( int argc, char* argv[] ) { /* Spawn child process(es) */ + int rc = EXIT_SUCCESS; + int parent = 1; // boolean int i; for(i = 0; i < configInfo->size; i++) { jsonObject* configChunk = jsonObjectGetIndex(configInfo, i); @@ -104,18 +109,59 @@ int main( int argc, char* argv[] ) { } if(fork() == 0) { /* create a new child to run this router instance */ setupRouter(configChunk); + parent = 0; break; /* We're a child; don't spawn any more children here */ } } + if( parent ) { + // Wait for all child processes to terminate. + // If any ended abnormally, report it. + while( 1 ) { // Loop until all children terminate + int status; + errno = 0; + pid_t child_pid = wait( &status ); + if( -1 == child_pid ) { + // ECHILD means no children are left. Anything else we ignore. + if( ECHILD == errno ) + break; + } else if( WIFEXITED( status ) ) { + // Relatively normal exit, i.e. via calling exit() + // or _exit(), or by returning from main() + int child_rc = WEXITSTATUS( status ); + if( child_rc ) { + osrfLogWarning( OSRF_LOG_MARK, + "Child router process %ld exited with return status %d", + (long) child_pid, child_rc ); + rc = EXIT_FAILURE; + } else { + ; // Terminated successfully; silently ignore + } + } else if( WIFSIGNALED( status ) ) { + // Killed by a signal + int signo = WTERMSIG( status ); + const char* extra = ""; +#ifdef WCOREDUMP + if( WCOREDUMP( status ) ) + extra = "with core dump "; +#endif + osrfLogWarning( OSRF_LOG_MARK, "Child router process %ld killed %sby signal %d", + (long) child_pid, extra, signo ); + + rc = EXIT_FAILURE; + } + } + } + if( stop_signal ) { - // Interrupted by a signal? Re raise so the parent can see it. + // Interrupted by a signal? Re-raise so the parent can see it. osrfLogWarning( OSRF_LOG_MARK, "Interrupted by signal %d; re-raising", (int) stop_signal ); + signal( stop_signal, SIG_DFL ); raise( stop_signal ); } - return EXIT_SUCCESS; + return rc; } /** @@ -144,7 +190,7 @@ static void setupRouter(jsonObject* configChunk) { if(!log_file) { - fprintf(stderr, "Log file name not specified for router\n"); + osrfLogError( OSRF_LOG_MARK, "Log file name not specified for router" ); return; } @@ -196,7 +242,6 @@ static void setupRouter(jsonObject* configChunk) { osrfStringArrayAdd(tclients, clientDomain); } - if( tclients->size == 0 || tservers->size == 0 ) { osrfLogError( OSRF_LOG_MARK, "We need trusted servers and trusted client to run the router..."); @@ -213,7 +258,8 @@ static void setupRouter(jsonObject* configChunk) { signal(SIGTERM,routerSignalHandler); if( (osrfRouterConnect(router)) != 0 ) { - fprintf(stderr, "Unable to connect router to jabber server %s... exiting\n", server ); + osrfLogError( OSRF_LOG_MARK, "Unable to connect router to jabber server %s... exiting", + server ); osrfRouterFree(router); return; } -- 2.43.2