added cache cleanup code
[OpenSRF.git] / src / libopensrf / osrf_system.c
index c378b7b..92cfaca 100644 (file)
@@ -4,9 +4,28 @@
 #include <signal.h>
 
 static int _osrfSystemInitCache( void );
+static void report_child_status( pid_t pid, int status );
+struct child_node;
+typedef struct child_node ChildNode;
+
+struct child_node
+{
+       ChildNode* pNext;
+       ChildNode* pPrev;
+       pid_t pid;
+       char* app;
+       char* libfile;
+};
+
+static ChildNode* child_list;
 
 static transport_client* osrfGlobalTransportClient = NULL;
 
+static void add_child( pid_t pid, const char* app, const char* libfile );
+static void delete_child( ChildNode* node );
+static void delete_all_children( void );
+static ChildNode* seek_child( pid_t pid );
+
 transport_client* osrfSystemGetTransportClient( void ) {
        return osrfGlobalTransportClient;
 }
@@ -79,7 +98,12 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
                        hostname, configfile );
                return -1;
        }
-       
+
+       /** daemonize me **/
+       /* background and let our children do their thing */
+       /* NOTE: This has been moved from below the 'if (apps)' block below ... move it back if things go crazy */
+       daemonize();
+
        jsonObject* apps = osrf_settings_host_value_object("/activeapps/appname");
        osrfStringArray* arr = osrfNewStringArray(8);
        
@@ -117,12 +141,14 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
                                pid_t pid;
                
                                if( (pid = fork()) ) { 
-                                       // storage pid in local table for re-launching dead children...
-                                       osrfLogInfo( OSRF_LOG_MARK, "Launched application child %ld", (long) pid);
+                                       // store pid in local list for re-launching dead children...
+                                       add_child( pid, appname, libfile );
+                                       osrfLogInfo( OSRF_LOG_MARK, "Running application child %s: process id %ld",
+                                                                appname, (long) pid );
        
                                } else {
                
-                                       fprintf(stderr, " * Running application %s\n", appname);
+                                       osrfLogError( OSRF_LOG_MARK, " * Running application %s\n", appname);
                                        if( osrfAppRegisterApplication( appname, libfile ) == 0 ) 
                                                osrf_prefork_run(appname);
        
@@ -131,30 +157,144 @@ int osrfSystemBootstrap( char* hostname, char* configfile, char* contextNode ) {
                                }
                        } // language == c
                } 
+       } // should we do something if there are no apps? does the wait(NULL) below do that for us?
+
+       while(1) {
+               errno = 0;
+               int status;
+               pid_t pid = wait( &status );
+               if(-1 == pid) {
+                       if(errno == ECHILD)
+                               osrfLogError(OSRF_LOG_MARK, "We have no more live services... exiting");
+                       else
+                               osrfLogError(OSRF_LOG_MARK, "Exiting top-level system loop with error: %s", strerror(errno));
+                       break;
+               } else {
+                       report_child_status( pid, status );
+               }
        }
 
-       /** daemonize me **/
+       delete_all_children();
+       return 0;
+}
 
-       /* background and let our children do their thing */
-       daemonize();
-    while(1) {
-        errno = 0;
-        pid_t pid = wait(NULL);
-        if(-1 == pid) {
-            if(errno == ECHILD)
-                osrfLogError(OSRF_LOG_MARK, "We have no more live services... exiting");
-            else
-                osrfLogError(OSRF_LOG_MARK, "Exiting top-level system loop with error: %s", strerror(errno));
-            break;
-        } else {
-            osrfLogError(OSRF_LOG_MARK, "We lost a top-level service process with PID %ld", pid);
-        }
-    }
 
+static void report_child_status( pid_t pid, int status )
+{
+       const char* app;
+       const char* libfile;
+       ChildNode* node = seek_child( pid );
 
-       return 0;
+       if( node ) {
+               app     = node->app     ? node->app     : "[unknown]";
+               libfile = node->libfile ? node->libfile : "[none]";
+       } else
+               app = libfile = NULL;
+       
+       if( WIFEXITED( status ) )
+       {
+               int rc = WEXITSTATUS( status );  // return code of child process
+               if( rc )
+                       osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) exited with return code %d",
+                                                 (long) pid, app, rc );
+               else
+                       osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) exited normally",
+                                                 (long) pid, app );
+       }
+       else if( WIFSIGNALED( status ) )
+       {
+               osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) killed by signal %d",
+                                         (long) pid, app, WTERMSIG( status) );
+       }
+       else if( WIFSTOPPED( status ) )
+       {
+               osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) stopped by signal %d",
+                                         (long) pid, app, (int) WSTOPSIG( status ) );
+       }
+
+       delete_child( node );
 }
 
+/*----------- Routines to manage list of children --*/
+
+static void add_child( pid_t pid, const char* app, const char* libfile )
+{
+       /* Construct new child node */
+       
+       ChildNode* node = safe_malloc( sizeof( ChildNode ) );
+
+       node->pid = pid;
+
+       if( app )
+               node->app = strdup( app );
+       else
+               node->app = NULL;
+
+       if( libfile )
+               node->libfile = strdup( libfile );
+       else
+               node->libfile = NULL;
+       
+       /* Add new child node to the head of the list */
+
+       node->pNext = child_list;
+       node->pPrev = NULL;
+
+       if( child_list )
+               child_list->pPrev = node;
+
+       child_list = node;
+}
+
+static void delete_child( ChildNode* node ) {
+
+       /* Sanity check */
+
+       if( ! node )
+               return;
+       
+       /* Detach the node from the list */
+
+       if( node->pPrev )
+               node->pPrev->pNext = node->pNext;
+       else
+               child_list = node->pNext;
+
+       if( node->pNext )
+               node->pNext->pPrev = node->pPrev;
+
+       /* Deallocate the node and its payload */
+
+       free( node->app );
+       free( node->libfile );
+       free( node );
+}
+
+static void delete_all_children( void ) {
+
+       while( child_list )
+               delete_child( child_list );
+}
+
+static ChildNode* seek_child( pid_t pid ) {
+
+       /* Return a pointer to the child node for the */
+       /* specified process ID, or NULL if not found */
+       
+       ChildNode* node = child_list;
+       while( node ) {
+               if( node->pid == pid )
+                       break;
+               else
+                       node = node->pNext;
+       }
+
+       return node;
+}
+
+/*----------- End of routines to manage list of children --*/
+
+
 int osrf_system_bootstrap_client_resc( char* config_file, char* contextnode, char* resource ) {
 
        int failure = 0;
@@ -314,6 +454,7 @@ int osrf_system_disconnect_client( void ) {
 
 int osrf_system_shutdown( void ) {
        osrfConfigCleanup();
+    osrfCacheCleanup();
        osrf_system_disconnect_client();
        osrf_settings_free_host_config(NULL);
        osrfAppSessionCleanup();