3 @brief Launch a collection of servers.
12 #include <sys/select.h>
16 #include "opensrf/utils.h"
17 #include "opensrf/log.h"
18 #include "opensrf/osrf_system.h"
19 #include "opensrf/osrf_application.h"
20 #include "opensrf/osrf_prefork.h"
23 #define HOST_NAME_MAX 256
26 static void report_child_status( pid_t pid, int status );
28 typedef struct child_node ChildNode;
31 @brief Represents a child process.
35 ChildNode* pNext; /**< Linkage pointer for doubly linked list. */
36 ChildNode* pPrev; /**< Linkage pointer for doubly linked list. */
37 pid_t pid; /**< Process ID of the child process. */
42 /** List of child processes. */
43 static ChildNode* child_list;
45 /** Pointer to the global transport_client; i.e. our connection to Jabber. */
46 static transport_client* osrfGlobalTransportClient = NULL;
48 /** Switch to be set by signal handler */
49 static volatile sig_atomic_t sig_caught;
51 /** Boolean: set to true when we finish shutting down. */
52 static int shutdownComplete = 0;
54 /** Name of file to which to write the process ID of the child process */
55 char* pidfile_name = NULL;
57 static void add_child( pid_t pid, const char* app, const char* libfile );
58 static void delete_child( ChildNode* node );
59 static void delete_all_children( void );
60 static ChildNode* seek_child( pid_t pid );
63 @brief Wait on all dead child processes so that they won't be zombies.
65 static void reap_children( void ) {
67 if( SIGTERM == sig_caught || SIGINT == sig_caught ) {
68 osrfLogInfo( OSRF_LOG_MARK, "Killed by %s; terminating",
69 SIGTERM == sig_caught ? "SIGTERM" : "SIGINT" );
71 osrfLogInfo( OSRF_LOG_MARK, "Killed by signal %d; terminating", (int) sig_caught );
74 // If we caught a signal, then the signal handler already did a kill().
75 // If we didn't, then do the kill() now.
79 sleep(1); /* Give the children a chance to die before we reap them. */
81 // Wait for each dead child. The WNOHANG option means to return immediately if
82 // there are no dead children, instead of waiting for them to die. It is therefore
83 // possible for a child still to be alive when we exit this function, either because
84 // it intercepted the SIGTERM and ignored it, or because it took longer to die than
85 // the time we gave it.
87 while( (child_pid = waitpid(-1, NULL, WNOHANG)) > 0 )
88 osrfLogInfo(OSRF_LOG_MARK, "Killed child %d", child_pid);
90 // Remove all nodes from the list of child processes.
91 delete_all_children();
95 @brief Signal handler for SIGTERM and SIGINT.
97 Kill all child processes, and set a switch so that we'll know that the signal arrived.
99 static void handleKillSignal( int signo ) {
100 // First ignore SIGTERM. Otherwise we would send SIGTERM to ourself, intercept it,
101 // and kill() again in an endless loop.
102 signal( SIGTERM, SIG_IGN );
104 //Kill all child processes. This is safe to do in a signal handler, because POSIX
105 // specifies that kill() is reentrant. It is necessary because, if we did the kill()
106 // only in reap_children() (above), then there would be a narrow window of vulnerability
107 // in the main loop: if the signal arrives between checking sig_caught and calling wait(),
108 // we would wait indefinitely for a child to die on its own.
114 @brief Return a pointer to the global transport_client.
115 @return Pointer to the global transport_client, or NULL.
117 A given process needs only one connection to Jabber, so we keep it a pointer to it at
118 file scope. This function returns that pointer.
120 If the connection has been opened by a previous call to osrfSystemBootstrapClientResc(),
121 return the pointer. Otherwise return NULL.
123 transport_client* osrfSystemGetTransportClient( void ) {
124 return osrfGlobalTransportClient;
128 @brief Save a copy of a file name to be used for writing a process ID.
129 @param name Designated file name, or NULL.
131 Save a file name for later use in saving a process ID. If @a name is NULL, leave
134 When the parent process spawns a child, the child becomes a daemon. The parent writes the
135 child's process ID to the PID file, if one has been designated, so that some other process
136 can retrieve the PID later and kill the daemon.
138 void osrfSystemSetPidFile( const char* name ) {
140 free( pidfile_name );
143 pidfile_name = strdup( name );
149 @brief Discard the global transport_client, but without disconnecting from Jabber.
151 To be called by a child process in order to disregard the parent's connection without
152 disconnecting it, since disconnecting would disconnect the parent as well.
154 void osrfSystemIgnoreTransportClient() {
155 client_discard( osrfGlobalTransportClient );
156 osrfGlobalTransportClient = NULL;
160 @brief Bootstrap a generic application from info in the configuration file.
161 @param config_file Name of the configuration file.
162 @param contextnode Name of an aggregate within the configuration file, containing the
163 relevant subset of configuration stuff.
164 @return 1 if successful; zero or -1 if error.
166 - Load the configuration file.
168 - Open a connection to Jabber.
170 A thin wrapper for osrfSystemBootstrapClientResc, passing it NULL for a resource.
172 int osrf_system_bootstrap_client( char* config_file, char* contextnode ) {
173 return osrfSystemBootstrapClientResc(config_file, contextnode, NULL);
177 @brief Connect to one or more cache servers.
178 @return Zero in all cases.
180 int osrfSystemInitCache( void ) {
182 jsonObject* cacheServers = osrf_settings_host_value_object("/cache/global/servers/server");
183 char* maxCache = osrf_settings_host_value("/cache/global/max_cache_time");
185 if( cacheServers && maxCache) {
187 if( cacheServers->type == JSON_ARRAY ) {
189 const char* servers[cacheServers->size];
190 for( i = 0; i != cacheServers->size; i++ ) {
191 servers[i] = jsonObjectGetString( jsonObjectGetIndex(cacheServers, i) );
192 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[i]);
194 osrfCacheInit( servers, cacheServers->size, atoi(maxCache) );
197 const char* servers[] = { jsonObjectGetString(cacheServers) };
198 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[0]);
199 osrfCacheInit( servers, 1, atoi(maxCache) );
203 osrfLogError( OSRF_LOG_MARK, "Missing config value for /cache/global/servers/server _or_ "
204 "/cache/global/max_cache_time");
207 jsonObjectFree( cacheServers );
212 @brief Launch a collection of servers, as defined by the settings server.
213 @param hostname Full network name of the host where the process is running; or
215 @param configfile Name of the configuration file; normally '/openils/conf/opensrf_core.xml'.
216 @param contextNode Name of an aggregate within the configuration file, containing the
217 relevant subset of configuration stuff.
218 @return - Zero if successful, or -1 if not.
220 int osrfSystemBootstrap( const char* hostname, const char* configfile,
221 const char* contextNode ) {
222 if( !(hostname && configfile && contextNode) )
225 // Load the conguration, open the log, open a connection to Jabber
226 if(!osrfSystemBootstrapClientResc(configfile, contextNode, "settings_grabber" )) {
227 osrfLogError( OSRF_LOG_MARK,
228 "Unable to bootstrap for host %s from configuration file %s",
229 hostname, configfile );
233 shutdownComplete = 0;
235 // Get a list of applications to launch from the settings server
236 int retcode = osrf_settings_retrieve(hostname);
237 osrf_system_disconnect_client();
240 osrfLogError( OSRF_LOG_MARK,
241 "Unable to retrieve settings for host %s from configuration file %s",
242 hostname, configfile );
246 // Turn into a daemon. The parent forks and exits. Only the
247 // child returns, with the standard streams (stdin, stdout, and
248 // stderr) redirected to /dev/null.
249 FILE* pidfile = NULL;
251 pidfile = fopen( pidfile_name, "w" );
253 osrfLogError( OSRF_LOG_MARK, "Unable to open PID file \"%s\": %s",
254 pidfile_name, strerror( errno ) );
255 free( pidfile_name );
260 daemonize_write_pid( pidfile );
263 free( pidfile_name );
267 jsonObject* apps = osrf_settings_host_value_object("/activeapps/appname");
268 osrfStringArray* arr = osrfNewStringArray(8);
273 if(apps->type == JSON_STRING) {
274 osrfStringArrayAdd(arr, jsonObjectGetString(apps));
277 const jsonObject* app;
278 while( (app = jsonObjectGetIndex(apps, i++)) )
279 osrfStringArrayAdd(arr, jsonObjectGetString(app));
281 jsonObjectFree(apps);
283 const char* appname = NULL;
285 while( (appname = osrfStringArrayGetString(arr, i++)) ) {
287 char* lang = osrf_settings_host_value("/apps/%s/language", appname);
289 if(lang && !strcasecmp(lang,"c")) {
291 char* libfile = osrf_settings_host_value("/apps/%s/implementation", appname);
293 if(! (appname && libfile) ) {
294 osrfLogWarning( OSRF_LOG_MARK, "Missing appname / libfile in settings config");
298 osrfLogInfo( OSRF_LOG_MARK, "Launching application %s with implementation %s",
303 if( (pid = fork()) ) { // if parent
304 // store pid in local list for re-launching dead children...
305 add_child( pid, appname, libfile );
306 osrfLogInfo( OSRF_LOG_MARK, "Running application child %s: process id %ld",
307 appname, (long) pid );
309 } else { // if child, run the application
311 osrfLogInfo( OSRF_LOG_MARK, " * Running application %s\n", appname);
312 if( osrfAppRegisterApplication( appname, libfile ) == 0 )
313 osrf_prefork_run(appname);
315 osrfLogDebug( OSRF_LOG_MARK, "Server exiting for app %s and library %s\n",
323 osrfStringArrayFree(arr);
325 signal(SIGTERM, handleKillSignal);
326 signal(SIGINT, handleKillSignal);
328 // Wait indefinitely for all the child processes to terminate, or for a signal to
329 // tell us to stop. When there are no more child processes, wait() returns an
330 // ECHILD error and we break out of the loop.
333 while( ! sig_caught ) {
334 pid = wait( &status );
336 if( errno == ECHILD )
337 osrfLogError( OSRF_LOG_MARK, "We have no more live services... exiting" );
338 else if( errno != EINTR )
339 osrfLogError(OSRF_LOG_MARK, "Exiting top-level system loop with error: %s",
344 report_child_status( pid, status );
350 osrf_system_disconnect_client();
351 osrf_settings_free_host_config(NULL);
356 @brief Report the exit status of a dead child process, then remove it from the list.
357 @param pid Process ID of the child.
358 @param status Exit status as captured by wait().
360 static void report_child_status( pid_t pid, int status )
364 ChildNode* node = seek_child( pid );
367 app = node->app ? node->app : "[unknown]";
368 libfile = node->libfile ? node->libfile : "[none]";
372 if( WIFEXITED( status ) )
374 int rc = WEXITSTATUS( status ); // return code of child process
376 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) exited with return code %d",
377 (long) pid, app, rc );
379 osrfLogInfo( OSRF_LOG_MARK, "Child process %ld (app %s) exited normally",
382 else if( WIFSIGNALED( status ) )
384 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) killed by signal %d",
385 (long) pid, app, WTERMSIG( status) );
387 else if( WIFSTOPPED( status ) )
389 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) stopped by signal %d",
390 (long) pid, app, (int) WSTOPSIG( status ) );
393 delete_child( node );
396 /*----------- Routines to manage list of children --*/
399 @brief Add a node to the list of child processes.
400 @param pid Process ID of the child process.
401 @param app Name of the child application.
402 @param libfile Name of the shared library where the child process resides.
404 static void add_child( pid_t pid, const char* app, const char* libfile )
406 /* Construct new child node */
408 ChildNode* node = safe_malloc( sizeof( ChildNode ) );
413 node->app = strdup( app );
418 node->libfile = strdup( libfile );
420 node->libfile = NULL;
422 /* Add new child node to the head of the list */
424 node->pNext = child_list;
428 child_list->pPrev = node;
434 @brief Remove a node from the list of child processes.
435 @param node Pointer to the node to be removed.
437 static void delete_child( ChildNode* node ) {
444 /* Detach the node from the list */
447 node->pPrev->pNext = node->pNext;
449 child_list = node->pNext;
452 node->pNext->pPrev = node->pPrev;
454 /* Deallocate the node and its payload */
457 free( node->libfile );
462 @brief Remove all nodes from the list of child processes, rendering it empty.
464 static void delete_all_children( void ) {
467 delete_child( child_list );
471 @brief Find the node for a child process of a given process ID.
472 @param pid The process ID of the child process.
473 @return A pointer to the corresponding node if found; otherwise NULL.
475 static ChildNode* seek_child( pid_t pid ) {
477 /* Return a pointer to the child node for the */
478 /* specified process ID, or NULL if not found */
480 ChildNode* node = child_list;
482 if( node->pid == pid )
491 /*----------- End of routines to manage list of children --*/
494 @brief Bootstrap a generic application from info in the configuration file.
495 @param config_file Name of the configuration file.
496 @param contextnode Name of an aggregate within the configuration file, containing the
497 relevant subset of configuration stuff.
498 @param resource Used to construct a Jabber resource name; may be NULL.
499 @return 1 if successful; zero or -1 if error.
501 - Load the configuration file.
503 - Open a connection to Jabber.
505 int osrfSystemBootstrapClientResc( const char* config_file,
506 const char* contextnode, const char* resource ) {
510 if(osrfSystemGetTransportClient()) {
511 osrfLogInfo(OSRF_LOG_MARK, "Client is already bootstrapped");
512 return 1; /* we already have a client connection */
515 if( !( config_file && contextnode ) && ! osrfConfigHasDefaultConfig() ) {
516 osrfLogError( OSRF_LOG_MARK, "No Config File Specified\n" );
521 osrfConfig* cfg = osrfConfigInit( config_file, contextnode );
523 osrfConfigSetDefaultConfig(cfg);
525 return 0; /* Can't load configuration? Bail out */
528 char* log_file = osrfConfigGetValue( NULL, "/logfile");
530 fprintf(stderr, "No log file specified in configuration file %s\n",
535 char* log_level = osrfConfigGetValue( NULL, "/loglevel" );
536 osrfStringArray* arr = osrfNewStringArray(8);
537 osrfConfigGetValueList(NULL, arr, "/domain");
539 char* username = osrfConfigGetValue( NULL, "/username" );
540 char* password = osrfConfigGetValue( NULL, "/passwd" );
541 char* port = osrfConfigGetValue( NULL, "/port" );
542 char* unixpath = osrfConfigGetValue( NULL, "/unixpath" );
543 char* facility = osrfConfigGetValue( NULL, "/syslog" );
544 char* actlog = osrfConfigGetValue( NULL, "/actlog" );
546 /* if we're a source-client, tell the logger */
547 char* isclient = osrfConfigGetValue(NULL, "/client");
548 if( isclient && !strcasecmp(isclient,"true") )
549 osrfLogSetIsClient(1);
554 if(port) iport = atoi(port);
555 if(log_level) llevel = atoi(log_level);
557 if(!strcmp(log_file, "syslog")) {
558 osrfLogInit( OSRF_LOG_TYPE_SYSLOG, contextnode, llevel );
559 osrfLogSetSyslogFacility(osrfLogFacilityToInt(facility));
560 if(actlog) osrfLogSetSyslogActFacility(osrfLogFacilityToInt(actlog));
563 osrfLogInit( OSRF_LOG_TYPE_FILE, contextnode, llevel );
564 osrfLogSetFile( log_file );
568 /* Get a domain, if one is specified */
569 const char* domain = osrfStringArrayGetString( arr, 0 ); /* just the first for now */
571 fprintf(stderr, "No domain specified in configuration file %s\n", config_file);
572 osrfLogError( OSRF_LOG_MARK, "No domain specified in configuration file %s\n",
578 fprintf(stderr, "No username specified in configuration file %s\n", config_file);
579 osrfLogError( OSRF_LOG_MARK, "No username specified in configuration file %s\n",
585 fprintf(stderr, "No password specified in configuration file %s\n", config_file);
586 osrfLogError( OSRF_LOG_MARK, "No password specified in configuration file %s\n",
591 if((iport <= 0) && !unixpath) {
592 fprintf(stderr, "No unixpath or valid port in configuration file %s\n", config_file);
593 osrfLogError( OSRF_LOG_MARK, "No unixpath or valid port in configuration file %s\n",
599 osrfStringArrayFree(arr);
611 osrfLogInfo( OSRF_LOG_MARK, "Bootstrapping system with domain %s, port %d, and unixpath %s",
612 domain, iport, unixpath ? unixpath : "(none)" );
613 transport_client* client = client_init( domain, iport, unixpath, 0 );
615 char host[HOST_NAME_MAX + 1] = "";
616 gethostname(host, sizeof(host) );
617 host[HOST_NAME_MAX] = '\0';
621 snprintf(tbuf, 32, "%f", get_timestamp_millis());
623 if(!resource) resource = "";
625 int len = strlen(resource) + 256;
628 snprintf(buf, len - 1, "%s_%s_%s_%ld", resource, host, tbuf, (long) getpid() );
630 if(client_connect( client, username, password, buf, 10, AUTH_DIGEST )) {
631 osrfGlobalTransportClient = client;
634 osrfStringArrayFree(arr);
644 if(osrfGlobalTransportClient)
651 @brief Disconnect from Jabber.
652 @return Zero in all cases.
654 int osrf_system_disconnect_client( void ) {
655 client_disconnect( osrfGlobalTransportClient );
656 client_free( osrfGlobalTransportClient );
657 osrfGlobalTransportClient = NULL;
662 @brief Shut down a laundry list of facilities typically used by servers.
665 - Settings from configuration file
667 - Connection to Jabber
668 - Settings from settings server
669 - Application sessions
672 int osrf_system_shutdown( void ) {
678 osrf_system_disconnect_client();
679 osrf_settings_free_host_config(NULL);
680 osrfAppSessionCleanup();
682 shutdownComplete = 1;