Patch from Scott McKellar:
[OpenSRF.git] / src / libopensrf / osrf_system.c
index e5596dd..136558f 100644 (file)
@@ -3,10 +3,41 @@
 #include <opensrf/osrf_prefork.h>
 #include <signal.h>
 
-static int _osrfSystemInitCache( void );
+static void report_child_status( pid_t pid, int status );
+struct child_node;
+typedef struct child_node ChildNode;
+
+static void handleKillSignal(int signo) {
+    /* we are the top-level process and we've been 
+     * killed. Kill all of our children */
+    kill(0, SIGTERM);
+    sleep(1); /* give the children a chance to die before we reap them */
+    pid_t child_pid;
+    int status;
+    while( (child_pid=waitpid(-1,&status,WNOHANG)) > 0) 
+        osrfLogInfo(OSRF_LOG_MARK, "Killed child %d", child_pid);
+    _exit(0);
+}
+
+
+struct child_node
+{
+       ChildNode* pNext;
+       ChildNode* pPrev;
+       pid_t pid;
+       char* app;
+       char* libfile;
+};
+
+static ChildNode* child_list;
 
 static transport_client* osrfGlobalTransportClient = NULL;
 
+static void add_child( pid_t pid, const char* app, const char* libfile );
+static void delete_child( ChildNode* node );
+static void delete_all_children( void );
+static ChildNode* seek_child( pid_t pid );
+
 transport_client* osrfSystemGetTransportClient( void ) {
        return osrfGlobalTransportClient;
 }
@@ -15,20 +46,11 @@ void osrfSystemIgnoreTransportClient() {
        osrfGlobalTransportClient = NULL;
 }
 
-transport_client* osrf_system_get_transport_client( void ) {
-       return osrfGlobalTransportClient;
-}
-
 int osrf_system_bootstrap_client( char* config_file, char* contextnode ) {
-       return osrf_system_bootstrap_client_resc(config_file, contextnode, NULL);
-}
-
-int osrfSystemBootstrapClientResc( char* config_file, char* contextnode, char* resource ) {
-       return osrf_system_bootstrap_client_resc( config_file, contextnode, resource );
+       return osrfSystemBootstrapClientResc(config_file, contextnode, NULL);
 }
 
-
-static int _osrfSystemInitCache( void ) {
+int osrfSystemInitCache( void ) {
 
        jsonObject* cacheServers = osrf_settings_host_value_object("/cache/global/servers/server");
        char* maxCache = osrf_settings_host_value("/cache/global/max_cache_time");
@@ -37,7 +59,7 @@ static int _osrfSystemInitCache( void ) {
 
                if( cacheServers->type == JSON_ARRAY ) {
                        int i;
-                       char* servers[cacheServers->size];
+                       const char* servers[cacheServers->size];
                        for( i = 0; i != cacheServers->size; i++ ) {
                                servers[i] = jsonObjectGetString( jsonObjectGetIndex(cacheServers, i) );
                                osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[i]);
@@ -45,7 +67,7 @@ static int _osrfSystemInitCache( void ) {
                        osrfCacheInit( servers, cacheServers->size, atoi(maxCache) );
 
                } else {
-                       char* servers[] = { jsonObjectGetString(cacheServers) };                
+                       const char* servers[] = { jsonObjectGetString(cacheServers) };          
                        osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[0]);
                        osrfCacheInit( servers, 1, atoi(maxCache) );
                }
@@ -88,8 +110,6 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
        jsonObject* apps = osrf_settings_host_value_object("/activeapps/appname");
        osrfStringArray* arr = osrfNewStringArray(8);
        
-       _osrfSystemInitCache();
-
        if(apps) {
                int i = 0;
 
@@ -97,7 +117,7 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
                        osrfStringArrayAdd(arr, jsonObjectGetString(apps));
 
                } else {
-                       jsonObject* app;
+                       const jsonObject* app;
                        while( (app = jsonObjectGetIndex(apps, i++)) ) 
                                osrfStringArrayAdd(arr, jsonObjectGetString(app));
                }
@@ -122,12 +142,14 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
                                pid_t pid;
                
                                if( (pid = fork()) ) { 
-                                       // storage pid in local table for re-launching dead children...
-                                       osrfLogInfo( OSRF_LOG_MARK, "Launched application child %ld", (long) pid);
+                                       // store pid in local list for re-launching dead children...
+                                       add_child( pid, appname, libfile );
+                                       osrfLogInfo( OSRF_LOG_MARK, "Running application child %s: process id %ld",
+                                                                appname, (long) pid );
        
                                } else {
                
-                                       osrfLogError( OSRF_LOG_MARK, " * Running application %s\n", appname);
+                                       osrfLogInfo( OSRF_LOG_MARK, " * Running application %s\n", appname);
                                        if( osrfAppRegisterApplication( appname, libfile ) == 0 ) 
                                                osrf_prefork_run(appname);
        
@@ -138,9 +160,15 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
                } 
        } // should we do something if there are no apps? does the wait(NULL) below do that for us?
 
+       osrfStringArrayFree(arr);
+
+    signal(SIGTERM, handleKillSignal);
+    signal(SIGINT, handleKillSignal);
+       
        while(1) {
                errno = 0;
-               pid_t pid = wait(NULL);
+               int status;
+               pid_t pid = wait( &status );
                if(-1 == pid) {
                        if(errno == ECHILD)
                                osrfLogError(OSRF_LOG_MARK, "We have no more live services... exiting");
@@ -148,14 +176,132 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
                                osrfLogError(OSRF_LOG_MARK, "Exiting top-level system loop with error: %s", strerror(errno));
                        break;
                } else {
-                       osrfLogError(OSRF_LOG_MARK, "We lost a top-level service process with PID %ld", pid);
+                       report_child_status( pid, status );
                }
        }
 
+       delete_all_children();
        return 0;
 }
 
-int osrf_system_bootstrap_client_resc( char* config_file, char* contextnode, char* resource ) {
+
+static void report_child_status( pid_t pid, int status )
+{
+       const char* app;
+       const char* libfile;
+       ChildNode* node = seek_child( pid );
+
+       if( node ) {
+               app     = node->app     ? node->app     : "[unknown]";
+               libfile = node->libfile ? node->libfile : "[none]";
+       } else
+               app = libfile = NULL;
+       
+       if( WIFEXITED( status ) )
+       {
+               int rc = WEXITSTATUS( status );  // return code of child process
+               if( rc )
+                       osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) exited with return code %d",
+                                                 (long) pid, app, rc );
+               else
+                       osrfLogInfo( OSRF_LOG_MARK, "Child process %ld (app %s) exited normally",
+                                                 (long) pid, app );
+       }
+       else if( WIFSIGNALED( status ) )
+       {
+               osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) killed by signal %d",
+                                         (long) pid, app, WTERMSIG( status) );
+       }
+       else if( WIFSTOPPED( status ) )
+       {
+               osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) stopped by signal %d",
+                                         (long) pid, app, (int) WSTOPSIG( status ) );
+       }
+
+       delete_child( node );
+}
+
+/*----------- Routines to manage list of children --*/
+
+static void add_child( pid_t pid, const char* app, const char* libfile )
+{
+       /* Construct new child node */
+       
+       ChildNode* node = safe_malloc( sizeof( ChildNode ) );
+
+       node->pid = pid;
+
+       if( app )
+               node->app = strdup( app );
+       else
+               node->app = NULL;
+
+       if( libfile )
+               node->libfile = strdup( libfile );
+       else
+               node->libfile = NULL;
+       
+       /* Add new child node to the head of the list */
+
+       node->pNext = child_list;
+       node->pPrev = NULL;
+
+       if( child_list )
+               child_list->pPrev = node;
+
+       child_list = node;
+}
+
+static void delete_child( ChildNode* node ) {
+
+       /* Sanity check */
+
+       if( ! node )
+               return;
+       
+       /* Detach the node from the list */
+
+       if( node->pPrev )
+               node->pPrev->pNext = node->pNext;
+       else
+               child_list = node->pNext;
+
+       if( node->pNext )
+               node->pNext->pPrev = node->pPrev;
+
+       /* Deallocate the node and its payload */
+
+       free( node->app );
+       free( node->libfile );
+       free( node );
+}
+
+static void delete_all_children( void ) {
+
+       while( child_list )
+               delete_child( child_list );
+}
+
+static ChildNode* seek_child( pid_t pid ) {
+
+       /* Return a pointer to the child node for the */
+       /* specified process ID, or NULL if not found */
+       
+       ChildNode* node = child_list;
+       while( node ) {
+               if( node->pid == pid )
+                       break;
+               else
+                       node = node->pNext;
+       }
+
+       return node;
+}
+
+/*----------- End of routines to manage list of children --*/
+
+
+int osrfSystemBootstrapClientResc( char* config_file, char* contextnode, char* resource ) {
 
        int failure = 0;
 
@@ -179,9 +325,15 @@ int osrf_system_bootstrap_client_resc( char* config_file, char* contextnode, cha
 
 
        char* log_file          = osrfConfigGetValue( NULL, "/logfile");
+       if(!log_file) {
+               fprintf(stderr, "No log file specified in configuration file %s\n",
+                               config_file);
+               return -1;
+       }
+
        char* log_level         = osrfConfigGetValue( NULL, "/loglevel" );
        osrfStringArray* arr    = osrfNewStringArray(8);
-       osrfConfigGetValueList(NULL, arr, "/domains/domain");
+       osrfConfigGetValueList(NULL, arr, "/domain");
 
        char* username          = osrfConfigGetValue( NULL, "/username" );
        char* password          = osrfConfigGetValue( NULL, "/passwd" );
@@ -190,19 +342,6 @@ int osrf_system_bootstrap_client_resc( char* config_file, char* contextnode, cha
        char* facility          = osrfConfigGetValue( NULL, "/syslog" );
        char* actlog            = osrfConfigGetValue( NULL, "/actlog" );
 
-       if(!log_file) {
-               fprintf(stderr, "No log file specified in configuration file %s\n",
-                          config_file);
-               free(log_level);
-               free(username);
-               free(password);
-               free(port);
-               free(unixpath);
-               free(facility);
-               free(actlog);
-               return -1;
-       }
-
        /* if we're a source-client, tell the logger */
        char* isclient = osrfConfigGetValue(NULL, "/client");
        if( isclient && !strcasecmp(isclient,"true") )
@@ -254,6 +393,7 @@ int osrf_system_bootstrap_client_resc( char* config_file, char* contextnode, cha
 
        if (failure) {
                osrfStringArrayFree(arr);
+               free(log_file);
                free(log_level);
                free(username);
                free(password);
@@ -314,6 +454,7 @@ int osrf_system_disconnect_client( void ) {
 
 int osrf_system_shutdown( void ) {
        osrfConfigCleanup();
+    osrfCacheCleanup();
        osrf_system_disconnect_client();
        osrf_settings_free_host_config(NULL);
        osrfAppSessionCleanup();