3 @brief Launch a collection of servers.
12 #include <sys/select.h>
16 #include "opensrf/utils.h"
17 #include "opensrf/log.h"
18 #include "opensrf/osrf_system.h"
19 #include "opensrf/osrf_application.h"
20 #include "opensrf/osrf_prefork.h"
23 #define HOST_NAME_MAX 256
26 static void report_child_status( pid_t pid, int status );
28 typedef struct child_node ChildNode;
31 @brief Represents a child process.
35 ChildNode* pNext; /**< Linkage pointer for doubly linked list. */
36 ChildNode* pPrev; /**< Linkage pointer for doubly linked list. */
37 pid_t pid; /**< Process ID of the child process. */
42 /** List of child processes. */
43 static ChildNode* child_list;
45 /** Pointer to the global transport_client; i.e. our connection to Jabber. */
46 static transport_client* osrfGlobalTransportClient = NULL;
48 /** Switch to be set by signal handler */
49 static volatile sig_atomic_t sig_caught;
51 /** Boolean: set to true when we finish shutting down. */
52 static int shutdownComplete = 0;
54 static void add_child( pid_t pid, const char* app, const char* libfile );
55 static void delete_child( ChildNode* node );
56 static void delete_all_children( void );
57 static ChildNode* seek_child( pid_t pid );
60 @brief Wait on all dead child processes so that they won't be zombies.
62 static void reap_children( void ) {
64 if( SIGTERM == sig_caught || SIGINT == sig_caught ) {
65 osrfLogInfo( OSRF_LOG_MARK, "Killed by %s; terminating",
66 SIGTERM == sig_caught ? "SIGTERM" : "SIGINT" );
68 osrfLogInfo( OSRF_LOG_MARK, "Killed by signal %d; terminating", (int) sig_caught );
71 // If we caught a signal, then the signal handler already did a kill().
72 // If we didn't, then do the kill() now.
76 sleep(1); /* Give the children a chance to die before we reap them. */
78 // Wait for each dead child. The WNOHANG option means to return immediately if
79 // there are no dead children, instead of waiting for them to die. It is therefore
80 // possible for a child still to be alive when we exit this function, either because
81 // it intercepted the SIGTERM and ignored it, or because it took longer to die than
82 // the time we gave it.
84 while( (child_pid = waitpid(-1, NULL, WNOHANG)) > 0 )
85 osrfLogInfo(OSRF_LOG_MARK, "Killed child %d", child_pid);
87 // Remove all nodes from the list of child processes.
88 delete_all_children();
92 @brief Signal handler for SIGTERM and SIGINT.
94 Kill all child processes, and set a switch so that we'll know that the signal arrived.
96 static void handleKillSignal( int signo ) {
97 // First ignore SIGTERM. Otherwise we would send SIGTERM to ourself, intercept it,
98 // and kill() again in an endless loop.
99 signal( SIGTERM, SIG_IGN );
101 //Kill all child processes. This is safe to do in a signal handler, because POSIX
102 // specifies that kill() is reentrant. It is necessary because, if we did the kill()
103 // only in reap_children() (above), then there would be a narrow window of vulnerability
104 // in the main loop: if the signal arrives between checking sig_caught and calling wait(),
105 // we would wait indefinitely for a child to die on its own.
111 @brief Return a pointer to the global transport_client.
112 @return Pointer to the global transport_client, or NULL.
114 A given process needs only one connection to Jabber, so we keep it a pointer to it at
115 file scope. This function returns that pointer.
117 If the connection has been opened by a previous call to osrfSystemBootstrapClientResc(),
118 return the pointer. Otherwise return NULL.
120 transport_client* osrfSystemGetTransportClient( void ) {
121 return osrfGlobalTransportClient;
125 @brief Discard the global transport_client, but without disconnecting from Jabber.
127 To be called by a child process in order to disregard the parent's connection without
128 disconnecting it, since disconnecting would disconnect the parent as well.
130 void osrfSystemIgnoreTransportClient() {
131 client_discard( osrfGlobalTransportClient );
132 osrfGlobalTransportClient = NULL;
136 @brief Bootstrap a generic application from info in the configuration file.
137 @param config_file Name of the configuration file.
138 @param contextnode Name of an aggregate within the configuration file, containing the
139 relevant subset of configuration stuff.
140 @return 1 if successful; zero or -1 if error.
142 - Load the configuration file.
144 - Open a connection to Jabber.
146 A thin wrapper for osrfSystemBootstrapClientResc, passing it NULL for a resource.
148 int osrf_system_bootstrap_client( char* config_file, char* contextnode ) {
149 return osrfSystemBootstrapClientResc(config_file, contextnode, NULL);
153 @brief Connect to one or more cache servers.
154 @return Zero in all cases.
156 int osrfSystemInitCache( void ) {
158 jsonObject* cacheServers = osrf_settings_host_value_object("/cache/global/servers/server");
159 char* maxCache = osrf_settings_host_value("/cache/global/max_cache_time");
161 if( cacheServers && maxCache) {
163 if( cacheServers->type == JSON_ARRAY ) {
165 const char* servers[cacheServers->size];
166 for( i = 0; i != cacheServers->size; i++ ) {
167 servers[i] = jsonObjectGetString( jsonObjectGetIndex(cacheServers, i) );
168 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[i]);
170 osrfCacheInit( servers, cacheServers->size, atoi(maxCache) );
173 const char* servers[] = { jsonObjectGetString(cacheServers) };
174 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[0]);
175 osrfCacheInit( servers, 1, atoi(maxCache) );
179 osrfLogError( OSRF_LOG_MARK, "Missing config value for /cache/global/servers/server _or_ "
180 "/cache/global/max_cache_time");
183 jsonObjectFree( cacheServers );
188 @brief Launch a collection of servers, as defined by the settings server.
189 @param hostname Full network name of the host where the process is running; or
191 @param configfile Name of the configuration file; normally '/openils/conf/opensrf_core.xml'.
192 @param contextNode Name of an aggregate within the configuration file, containing the
193 relevant subset of configuration stuff.
194 @return - Zero if successful, or -1 if not.
196 int osrfSystemBootstrap( const char* hostname, const char* configfile,
197 const char* contextNode ) {
198 if( !(hostname && configfile && contextNode) )
201 // Load the conguration, open the log, open a connection to Jabber
202 if(!osrfSystemBootstrapClientResc(configfile, contextNode, "settings_grabber" )) {
203 osrfLogError( OSRF_LOG_MARK,
204 "Unable to bootstrap for host %s from configuration file %s",
205 hostname, configfile );
209 shutdownComplete = 0;
211 // Get a list of applications to launch from the settings server
212 int retcode = osrf_settings_retrieve(hostname);
213 osrf_system_disconnect_client();
216 osrfLogError( OSRF_LOG_MARK,
217 "Unable to retrieve settings for host %s from configuration file %s",
218 hostname, configfile );
222 // Turn into a daemon. The parent forks and exits. Only the
223 // child returns, with the standard streams (stdin, stdout, and
224 // stderr) redirected to /dev/null.
227 jsonObject* apps = osrf_settings_host_value_object("/activeapps/appname");
228 osrfStringArray* arr = osrfNewStringArray(8);
233 if(apps->type == JSON_STRING) {
234 osrfStringArrayAdd(arr, jsonObjectGetString(apps));
237 const jsonObject* app;
238 while( (app = jsonObjectGetIndex(apps, i++)) )
239 osrfStringArrayAdd(arr, jsonObjectGetString(app));
241 jsonObjectFree(apps);
243 const char* appname = NULL;
245 while( (appname = osrfStringArrayGetString(arr, i++)) ) {
247 char* lang = osrf_settings_host_value("/apps/%s/language", appname);
249 if(lang && !strcasecmp(lang,"c")) {
251 char* libfile = osrf_settings_host_value("/apps/%s/implementation", appname);
253 if(! (appname && libfile) ) {
254 osrfLogWarning( OSRF_LOG_MARK, "Missing appname / libfile in settings config");
258 osrfLogInfo( OSRF_LOG_MARK, "Launching application %s with implementation %s",
263 if( (pid = fork()) ) { // if parent
264 // store pid in local list for re-launching dead children...
265 add_child( pid, appname, libfile );
266 osrfLogInfo( OSRF_LOG_MARK, "Running application child %s: process id %ld",
267 appname, (long) pid );
269 } else { // if child, run the application
271 osrfLogInfo( OSRF_LOG_MARK, " * Running application %s\n", appname);
272 if( osrfAppRegisterApplication( appname, libfile ) == 0 )
273 osrf_prefork_run(appname);
275 osrfLogDebug( OSRF_LOG_MARK, "Server exiting for app %s and library %s\n",
283 osrfStringArrayFree(arr);
285 signal(SIGTERM, handleKillSignal);
286 signal(SIGINT, handleKillSignal);
288 // Wait indefinitely for all the child processes to terminate, or for a signal to
289 // tell us to stop. When there are no more child processes, wait() returns an
290 // ECHILD error and we break out of the loop.
293 while( ! sig_caught ) {
294 pid = wait( &status );
296 if( errno == ECHILD )
297 osrfLogError( OSRF_LOG_MARK, "We have no more live services... exiting" );
298 else if( errno != EINTR )
299 osrfLogError(OSRF_LOG_MARK, "Exiting top-level system loop with error: %s",
304 report_child_status( pid, status );
310 osrf_system_disconnect_client();
311 osrf_settings_free_host_config(NULL);
316 @brief Report the exit status of a dead child process, then remove it from the list.
317 @param pid Process ID of the child.
318 @param status Exit status as captured by wait().
320 static void report_child_status( pid_t pid, int status )
324 ChildNode* node = seek_child( pid );
327 app = node->app ? node->app : "[unknown]";
328 libfile = node->libfile ? node->libfile : "[none]";
332 if( WIFEXITED( status ) )
334 int rc = WEXITSTATUS( status ); // return code of child process
336 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) exited with return code %d",
337 (long) pid, app, rc );
339 osrfLogInfo( OSRF_LOG_MARK, "Child process %ld (app %s) exited normally",
342 else if( WIFSIGNALED( status ) )
344 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) killed by signal %d",
345 (long) pid, app, WTERMSIG( status) );
347 else if( WIFSTOPPED( status ) )
349 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) stopped by signal %d",
350 (long) pid, app, (int) WSTOPSIG( status ) );
353 delete_child( node );
356 /*----------- Routines to manage list of children --*/
359 @brief Add a node to the list of child processes.
360 @param pid Process ID of the child process.
361 @param app Name of the child application.
362 @param libfile Name of the shared library where the child process resides.
364 static void add_child( pid_t pid, const char* app, const char* libfile )
366 /* Construct new child node */
368 ChildNode* node = safe_malloc( sizeof( ChildNode ) );
373 node->app = strdup( app );
378 node->libfile = strdup( libfile );
380 node->libfile = NULL;
382 /* Add new child node to the head of the list */
384 node->pNext = child_list;
388 child_list->pPrev = node;
394 @brief Remove a node from the list of child processes.
395 @param node Pointer to the node to be removed.
397 static void delete_child( ChildNode* node ) {
404 /* Detach the node from the list */
407 node->pPrev->pNext = node->pNext;
409 child_list = node->pNext;
412 node->pNext->pPrev = node->pPrev;
414 /* Deallocate the node and its payload */
417 free( node->libfile );
422 @brief Remove all nodes from the list of child processes, rendering it empty.
424 static void delete_all_children( void ) {
427 delete_child( child_list );
431 @brief Find the node for a child process of a given process ID.
432 @param pid The process ID of the child process.
433 @return A pointer to the corresponding node if found; otherwise NULL.
435 static ChildNode* seek_child( pid_t pid ) {
437 /* Return a pointer to the child node for the */
438 /* specified process ID, or NULL if not found */
440 ChildNode* node = child_list;
442 if( node->pid == pid )
451 /*----------- End of routines to manage list of children --*/
454 @brief Bootstrap a generic application from info in the configuration file.
455 @param config_file Name of the configuration file.
456 @param contextnode Name of an aggregate within the configuration file, containing the
457 relevant subset of configuration stuff.
458 @param resource Used to construct a Jabber resource name; may be NULL.
459 @return 1 if successful; zero or -1 if error.
461 - Load the configuration file.
463 - Open a connection to Jabber.
465 int osrfSystemBootstrapClientResc( const char* config_file,
466 const char* contextnode, const char* resource ) {
470 if(osrfSystemGetTransportClient()) {
471 osrfLogInfo(OSRF_LOG_MARK, "Client is already bootstrapped");
472 return 1; /* we already have a client connection */
475 if( !( config_file && contextnode ) && ! osrfConfigHasDefaultConfig() ) {
476 osrfLogError( OSRF_LOG_MARK, "No Config File Specified\n" );
481 osrfConfig* cfg = osrfConfigInit( config_file, contextnode );
483 osrfConfigSetDefaultConfig(cfg);
485 return 0; /* Can't load configuration? Bail out */
488 char* log_file = osrfConfigGetValue( NULL, "/logfile");
490 fprintf(stderr, "No log file specified in configuration file %s\n",
495 char* log_level = osrfConfigGetValue( NULL, "/loglevel" );
496 osrfStringArray* arr = osrfNewStringArray(8);
497 osrfConfigGetValueList(NULL, arr, "/domain");
499 char* username = osrfConfigGetValue( NULL, "/username" );
500 char* password = osrfConfigGetValue( NULL, "/passwd" );
501 char* port = osrfConfigGetValue( NULL, "/port" );
502 char* unixpath = osrfConfigGetValue( NULL, "/unixpath" );
503 char* facility = osrfConfigGetValue( NULL, "/syslog" );
504 char* actlog = osrfConfigGetValue( NULL, "/actlog" );
506 /* if we're a source-client, tell the logger */
507 char* isclient = osrfConfigGetValue(NULL, "/client");
508 if( isclient && !strcasecmp(isclient,"true") )
509 osrfLogSetIsClient(1);
514 if(port) iport = atoi(port);
515 if(log_level) llevel = atoi(log_level);
517 if(!strcmp(log_file, "syslog")) {
518 osrfLogInit( OSRF_LOG_TYPE_SYSLOG, contextnode, llevel );
519 osrfLogSetSyslogFacility(osrfLogFacilityToInt(facility));
520 if(actlog) osrfLogSetSyslogActFacility(osrfLogFacilityToInt(actlog));
523 osrfLogInit( OSRF_LOG_TYPE_FILE, contextnode, llevel );
524 osrfLogSetFile( log_file );
528 /* Get a domain, if one is specified */
529 const char* domain = osrfStringArrayGetString( arr, 0 ); /* just the first for now */
531 fprintf(stderr, "No domain specified in configuration file %s\n", config_file);
532 osrfLogError( OSRF_LOG_MARK, "No domain specified in configuration file %s\n",
538 fprintf(stderr, "No username specified in configuration file %s\n", config_file);
539 osrfLogError( OSRF_LOG_MARK, "No username specified in configuration file %s\n",
545 fprintf(stderr, "No password specified in configuration file %s\n", config_file);
546 osrfLogError( OSRF_LOG_MARK, "No password specified in configuration file %s\n",
551 if((iport <= 0) && !unixpath) {
552 fprintf(stderr, "No unixpath or valid port in configuration file %s\n", config_file);
553 osrfLogError( OSRF_LOG_MARK, "No unixpath or valid port in configuration file %s\n",
559 osrfStringArrayFree(arr);
571 osrfLogInfo( OSRF_LOG_MARK, "Bootstrapping system with domain %s, port %d, and unixpath %s",
572 domain, iport, unixpath ? unixpath : "(none)" );
573 transport_client* client = client_init( domain, iport, unixpath, 0 );
575 char host[HOST_NAME_MAX + 1] = "";
576 gethostname(host, sizeof(host) );
577 host[HOST_NAME_MAX] = '\0';
581 snprintf(tbuf, 32, "%f", get_timestamp_millis());
583 if(!resource) resource = "";
585 int len = strlen(resource) + 256;
588 snprintf(buf, len - 1, "%s_%s_%s_%ld", resource, host, tbuf, (long) getpid() );
590 if(client_connect( client, username, password, buf, 10, AUTH_DIGEST )) {
591 /* child nodes will leak the parents client... but we can't free
592 it without disconnecting the parents client :( */
593 osrfGlobalTransportClient = client;
596 osrfStringArrayFree(arr);
606 if(osrfGlobalTransportClient)
613 @brief Disconnect from Jabber.
614 @return Zero in all cases.
616 int osrf_system_disconnect_client( void ) {
617 client_disconnect( osrfGlobalTransportClient );
618 client_free( osrfGlobalTransportClient );
619 osrfGlobalTransportClient = NULL;
624 @brief Shut down a laundry list of facilities typically used by servers.
627 - Settings from configuration file
629 - Connection to Jabber
630 - Settings from settings server
631 - Application sessions
634 int osrf_system_shutdown( void ) {
640 osrf_system_disconnect_client();
641 osrf_settings_free_host_config(NULL);
642 osrfAppSessionCleanup();
644 shutdownComplete = 1;