3 @brief Launch a collection of servers.
12 #include <sys/select.h>
16 #include "opensrf/utils.h"
17 #include "opensrf/log.h"
18 #include "opensrf/osrf_system.h"
19 #include "opensrf/osrf_application.h"
20 #include "opensrf/osrf_prefork.h"
23 #define HOST_NAME_MAX 256
26 static void report_child_status( pid_t pid, int status );
28 typedef struct child_node ChildNode;
29 osrfStringArray* log_protect_arr = NULL;
32 @brief Represents a child process.
36 ChildNode* pNext; /**< Linkage pointer for doubly linked list. */
37 ChildNode* pPrev; /**< Linkage pointer for doubly linked list. */
38 pid_t pid; /**< Process ID of the child process. */
43 /** List of child processes. */
44 static ChildNode* child_list;
46 /** Pointer to the global transport_client; i.e. our connection to Jabber. */
47 static transport_client* osrfGlobalTransportClient = NULL;
49 /** Switch to be set by signal handler */
50 static volatile sig_atomic_t sig_caught;
52 /** Boolean: set to true when we finish shutting down. */
53 static int shutdownComplete = 0;
55 /** Name of file to which to write the process ID of the child process */
56 char* pidfile_name = NULL;
58 static void add_child( pid_t pid, const char* app, const char* libfile );
59 static void delete_child( ChildNode* node );
60 static void delete_all_children( void );
61 static ChildNode* seek_child( pid_t pid );
64 @brief Wait on all dead child processes so that they won't be zombies.
66 static void reap_children( void ) {
68 if( SIGTERM == sig_caught || SIGINT == sig_caught ) {
69 osrfLogInfo( OSRF_LOG_MARK, "Killed by %s; terminating",
70 SIGTERM == sig_caught ? "SIGTERM" : "SIGINT" );
72 osrfLogInfo( OSRF_LOG_MARK, "Killed by signal %d; terminating", (int) sig_caught );
75 // If we caught a signal, then the signal handler already did a kill().
76 // If we didn't, then do the kill() now.
80 sleep(1); /* Give the children a chance to die before we reap them. */
82 // Wait for each dead child. The WNOHANG option means to return immediately if
83 // there are no dead children, instead of waiting for them to die. It is therefore
84 // possible for a child still to be alive when we exit this function, either because
85 // it intercepted the SIGTERM and ignored it, or because it took longer to die than
86 // the time we gave it.
88 while( (child_pid = waitpid(-1, NULL, WNOHANG)) > 0 )
89 osrfLogInfo(OSRF_LOG_MARK, "Killed child %d", child_pid);
91 // Remove all nodes from the list of child processes.
92 delete_all_children();
96 @brief Signal handler for SIGTERM and SIGINT.
98 Kill all child processes, and set a switch so that we'll know that the signal arrived.
100 static void handleKillSignal( int signo ) {
101 // First ignore SIGTERM. Otherwise we would send SIGTERM to ourself, intercept it,
102 // and kill() again in an endless loop.
103 signal( SIGTERM, SIG_IGN );
105 //Kill all child processes. This is safe to do in a signal handler, because POSIX
106 // specifies that kill() is reentrant. It is necessary because, if we did the kill()
107 // only in reap_children() (above), then there would be a narrow window of vulnerability
108 // in the main loop: if the signal arrives between checking sig_caught and calling wait(),
109 // we would wait indefinitely for a child to die on its own.
115 @brief Return a pointer to the global transport_client.
116 @return Pointer to the global transport_client, or NULL.
118 A given process needs only one connection to Jabber, so we keep it a pointer to it at
119 file scope. This function returns that pointer.
121 If the connection has been opened by a previous call to osrfSystemBootstrapClientResc(),
122 return the pointer. Otherwise return NULL.
124 transport_client* osrfSystemGetTransportClient( void ) {
125 return osrfGlobalTransportClient;
129 @brief Save a copy of a file name to be used for writing a process ID.
130 @param name Designated file name, or NULL.
132 Save a file name for later use in saving a process ID. If @a name is NULL, leave
135 When the parent process spawns a child, the child becomes a daemon. The parent writes the
136 child's process ID to the PID file, if one has been designated, so that some other process
137 can retrieve the PID later and kill the daemon.
139 void osrfSystemSetPidFile( const char* name ) {
141 free( pidfile_name );
144 pidfile_name = strdup( name );
150 @brief Discard the global transport_client, but without disconnecting from Jabber.
152 To be called by a child process in order to disregard the parent's connection without
153 disconnecting it, since disconnecting would disconnect the parent as well.
155 void osrfSystemIgnoreTransportClient() {
156 client_discard( osrfGlobalTransportClient );
157 osrfGlobalTransportClient = NULL;
161 @brief Bootstrap a generic application from info in the configuration file.
162 @param config_file Name of the configuration file.
163 @param contextnode Name of an aggregate within the configuration file, containing the
164 relevant subset of configuration stuff.
165 @return 1 if successful; zero or -1 if error.
167 - Load the configuration file.
169 - Open a connection to Jabber.
171 A thin wrapper for osrfSystemBootstrapClientResc, passing it NULL for a resource.
173 int osrf_system_bootstrap_client( char* config_file, char* contextnode ) {
174 return osrfSystemBootstrapClientResc(config_file, contextnode, NULL);
178 @brief Connect to one or more cache servers.
179 @return Zero in all cases.
181 int osrfSystemInitCache( void ) {
183 jsonObject* cacheServers = osrf_settings_host_value_object("/cache/global/servers/server");
184 char* maxCache = osrf_settings_host_value("/cache/global/max_cache_time");
186 if( cacheServers && maxCache) {
188 if( cacheServers->type == JSON_ARRAY ) {
190 const char* servers[cacheServers->size];
191 for( i = 0; i != cacheServers->size; i++ ) {
192 servers[i] = jsonObjectGetString( jsonObjectGetIndex(cacheServers, i) );
193 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[i]);
195 osrfCacheInit( servers, cacheServers->size, atoi(maxCache) );
198 const char* servers[] = { jsonObjectGetString(cacheServers) };
199 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[0]);
200 osrfCacheInit( servers, 1, atoi(maxCache) );
204 osrfLogError( OSRF_LOG_MARK, "Missing config value for /cache/global/servers/server _or_ "
205 "/cache/global/max_cache_time");
208 jsonObjectFree( cacheServers );
213 @brief Launch a collection of servers, as defined by the settings server.
214 @param hostname Full network name of the host where the process is running; or
216 @param configfile Name of the configuration file; normally '/openils/conf/opensrf_core.xml'.
217 @param contextNode Name of an aggregate within the configuration file, containing the
218 relevant subset of configuration stuff.
219 @return - Zero if successful, or -1 if not.
221 int osrfSystemBootstrap( const char* hostname, const char* configfile,
222 const char* contextNode ) {
223 if( !(hostname && configfile && contextNode) )
226 // Load the conguration, open the log, open a connection to Jabber
227 if(!osrfSystemBootstrapClientResc(configfile, contextNode, "settings_grabber" )) {
228 osrfLogError( OSRF_LOG_MARK,
229 "Unable to bootstrap for host %s from configuration file %s",
230 hostname, configfile );
234 shutdownComplete = 0;
236 // Get a list of applications to launch from the settings server
237 int retcode = osrf_settings_retrieve(hostname);
238 osrf_system_disconnect_client();
241 osrfLogError( OSRF_LOG_MARK,
242 "Unable to retrieve settings for host %s from configuration file %s",
243 hostname, configfile );
247 // Turn into a daemon. The parent forks and exits. Only the
248 // child returns, with the standard streams (stdin, stdout, and
249 // stderr) redirected to /dev/null.
252 jsonObject* apps = osrf_settings_host_value_object("/activeapps/appname");
253 osrfStringArray* arr = osrfNewStringArray(8);
258 if(apps->type == JSON_STRING) {
259 osrfStringArrayAdd(arr, jsonObjectGetString(apps));
262 const jsonObject* app;
263 while( (app = jsonObjectGetIndex(apps, i++)) )
264 osrfStringArrayAdd(arr, jsonObjectGetString(app));
266 jsonObjectFree(apps);
268 const char* appname = NULL;
269 int first_launch = 1; // Boolean
271 while( (appname = osrfStringArrayGetString(arr, i++)) ) {
273 char* lang = osrf_settings_host_value("/apps/%s/language", appname);
275 if(lang && !strcasecmp(lang,"c")) {
277 char* libfile = osrf_settings_host_value("/apps/%s/implementation", appname);
279 if(! (appname && libfile) ) {
280 osrfLogWarning( OSRF_LOG_MARK, "Missing appname / libfile in settings config");
284 osrfLogInfo( OSRF_LOG_MARK, "Launching application %s with implementation %s",
289 if( (pid = fork()) ) { // if parent
290 // store pid in local list for re-launching dead children...
291 add_child( pid, appname, libfile );
292 osrfLogInfo( OSRF_LOG_MARK, "Running application child %s: process id %ld",
293 appname, (long) pid );
297 // Write our own PID to a PID file so that somebody can use it to
298 // send us a signal later. If we don't find any C apps to launch,
299 // then we will quietly exit without writing a PID file, and without
300 // waiting to be killed by a signal.
302 FILE* pidfile = fopen( pidfile_name, "w" );
304 osrfLogError( OSRF_LOG_MARK, "Unable to open PID file \"%s\": %s",
305 pidfile_name, strerror( errno ) );
306 free( pidfile_name );
310 fprintf( pidfile, "%ld\n", (long) getpid() );
317 } else { // if child, run the application
319 osrfLogInfo( OSRF_LOG_MARK, " * Running application %s\n", appname);
321 free( pidfile_name ); // tidy up some debris from the parent
324 if( osrfAppRegisterApplication( appname, libfile ) == 0 )
325 osrf_prefork_run(appname);
327 osrfLogDebug( OSRF_LOG_MARK, "Server exiting for app %s and library %s\n",
335 osrfStringArrayFree(arr);
337 signal(SIGTERM, handleKillSignal);
338 signal(SIGINT, handleKillSignal);
340 // Wait indefinitely for all the child processes to terminate, or for a signal to
341 // tell us to stop. When there are no more child processes, wait() returns an
342 // ECHILD error and we break out of the loop.
345 while( ! sig_caught ) {
346 pid = wait( &status );
348 if( errno == ECHILD )
349 osrfLogError( OSRF_LOG_MARK, "We have no more live services... exiting" );
350 else if( errno != EINTR )
351 osrfLogError(OSRF_LOG_MARK, "Exiting top-level system loop with error: %s",
354 // Since we're not being killed by a signal as usual, delete the PID file
355 // so that no one will try to kill us when we're already dead.
357 remove( pidfile_name );
360 report_child_status( pid, status );
366 osrf_system_disconnect_client();
367 osrf_settings_free_host_config(NULL);
368 free( pidfile_name );
374 @brief Report the exit status of a dead child process, then remove it from the list.
375 @param pid Process ID of the child.
376 @param status Exit status as captured by wait().
378 static void report_child_status( pid_t pid, int status )
382 ChildNode* node = seek_child( pid );
385 app = node->app ? node->app : "[unknown]";
386 libfile = node->libfile ? node->libfile : "[none]";
390 if( WIFEXITED( status ) )
392 int rc = WEXITSTATUS( status ); // return code of child process
394 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) exited with return code %d",
395 (long) pid, app, rc );
397 osrfLogInfo( OSRF_LOG_MARK, "Child process %ld (app %s) exited normally",
400 else if( WIFSIGNALED( status ) )
402 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) killed by signal %d",
403 (long) pid, app, WTERMSIG( status) );
405 else if( WIFSTOPPED( status ) )
407 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) stopped by signal %d",
408 (long) pid, app, (int) WSTOPSIG( status ) );
411 delete_child( node );
414 /*----------- Routines to manage list of children --*/
417 @brief Add a node to the list of child processes.
418 @param pid Process ID of the child process.
419 @param app Name of the child application.
420 @param libfile Name of the shared library where the child process resides.
422 static void add_child( pid_t pid, const char* app, const char* libfile )
424 /* Construct new child node */
426 ChildNode* node = safe_malloc( sizeof( ChildNode ) );
431 node->app = strdup( app );
436 node->libfile = strdup( libfile );
438 node->libfile = NULL;
440 /* Add new child node to the head of the list */
442 node->pNext = child_list;
446 child_list->pPrev = node;
452 @brief Remove a node from the list of child processes.
453 @param node Pointer to the node to be removed.
455 static void delete_child( ChildNode* node ) {
462 /* Detach the node from the list */
465 node->pPrev->pNext = node->pNext;
467 child_list = node->pNext;
470 node->pNext->pPrev = node->pPrev;
472 /* Deallocate the node and its payload */
475 free( node->libfile );
480 @brief Remove all nodes from the list of child processes, rendering it empty.
482 static void delete_all_children( void ) {
485 delete_child( child_list );
489 @brief Find the node for a child process of a given process ID.
490 @param pid The process ID of the child process.
491 @return A pointer to the corresponding node if found; otherwise NULL.
493 static ChildNode* seek_child( pid_t pid ) {
495 /* Return a pointer to the child node for the */
496 /* specified process ID, or NULL if not found */
498 ChildNode* node = child_list;
500 if( node->pid == pid )
509 /*----------- End of routines to manage list of children --*/
512 @brief Bootstrap a generic application from info in the configuration file.
513 @param config_file Name of the configuration file.
514 @param contextnode Name of an aggregate within the configuration file, containing the
515 relevant subset of configuration stuff.
516 @param resource Used to construct a Jabber resource name; may be NULL.
517 @return 1 if successful; zero or -1 if error.
519 - Load the configuration file.
521 - Open a connection to Jabber.
523 int osrfSystemBootstrapClientResc( const char* config_file,
524 const char* contextnode, const char* resource ) {
528 if(osrfSystemGetTransportClient()) {
529 osrfLogInfo(OSRF_LOG_MARK, "Client is already bootstrapped");
530 return 1; /* we already have a client connection */
533 if( !( config_file && contextnode ) && ! osrfConfigHasDefaultConfig() ) {
534 osrfLogError( OSRF_LOG_MARK, "No Config File Specified\n" );
539 osrfConfig* cfg = osrfConfigInit( config_file, contextnode );
541 osrfConfigSetDefaultConfig(cfg);
543 return 0; /* Can't load configuration? Bail out */
545 // fetch list of configured log redaction marker strings
546 log_protect_arr = osrfNewStringArray(8);
547 osrfConfig* cfg_shared = osrfConfigInit(config_file, "shared");
548 osrfConfigGetValueList( cfg_shared, log_protect_arr, "/log_protect/match_string" );
551 char* log_file = osrfConfigGetValue( NULL, "/logfile");
553 fprintf(stderr, "No log file specified in configuration file %s\n",
558 char* log_level = osrfConfigGetValue( NULL, "/loglevel" );
559 osrfStringArray* arr = osrfNewStringArray(8);
560 osrfConfigGetValueList(NULL, arr, "/domain");
562 char* username = osrfConfigGetValue( NULL, "/username" );
563 char* password = osrfConfigGetValue( NULL, "/passwd" );
564 char* port = osrfConfigGetValue( NULL, "/port" );
565 char* unixpath = osrfConfigGetValue( NULL, "/unixpath" );
566 char* facility = osrfConfigGetValue( NULL, "/syslog" );
567 char* actlog = osrfConfigGetValue( NULL, "/actlog" );
569 /* if we're a source-client, tell the logger */
570 char* isclient = osrfConfigGetValue(NULL, "/client");
571 if( isclient && !strcasecmp(isclient,"true") )
572 osrfLogSetIsClient(1);
577 if(port) iport = atoi(port);
578 if(log_level) llevel = atoi(log_level);
580 if(!strcmp(log_file, "syslog")) {
581 osrfLogInit( OSRF_LOG_TYPE_SYSLOG, contextnode, llevel );
582 osrfLogSetSyslogFacility(osrfLogFacilityToInt(facility));
583 if(actlog) osrfLogSetSyslogActFacility(osrfLogFacilityToInt(actlog));
586 osrfLogInit( OSRF_LOG_TYPE_FILE, contextnode, llevel );
587 osrfLogSetFile( log_file );
591 /* Get a domain, if one is specified */
592 const char* domain = osrfStringArrayGetString( arr, 0 ); /* just the first for now */
594 fprintf(stderr, "No domain specified in configuration file %s\n", config_file);
595 osrfLogError( OSRF_LOG_MARK, "No domain specified in configuration file %s\n",
601 fprintf(stderr, "No username specified in configuration file %s\n", config_file);
602 osrfLogError( OSRF_LOG_MARK, "No username specified in configuration file %s\n",
608 fprintf(stderr, "No password specified in configuration file %s\n", config_file);
609 osrfLogError( OSRF_LOG_MARK, "No password specified in configuration file %s\n",
614 if((iport <= 0) && !unixpath) {
615 fprintf(stderr, "No unixpath or valid port in configuration file %s\n", config_file);
616 osrfLogError( OSRF_LOG_MARK, "No unixpath or valid port in configuration file %s\n",
622 osrfStringArrayFree(arr);
634 osrfLogInfo( OSRF_LOG_MARK, "Bootstrapping system with domain %s, port %d, and unixpath %s",
635 domain, iport, unixpath ? unixpath : "(none)" );
636 transport_client* client = client_init( domain, iport, unixpath, 0 );
638 char host[HOST_NAME_MAX + 1] = "";
639 gethostname(host, sizeof(host) );
640 host[HOST_NAME_MAX] = '\0';
644 snprintf(tbuf, 32, "%f", get_timestamp_millis());
646 if(!resource) resource = "";
648 int len = strlen(resource) + 256;
651 snprintf(buf, len - 1, "%s_%s_%s_%ld", resource, host, tbuf, (long) getpid() );
653 if(client_connect( client, username, password, buf, 10, AUTH_DIGEST )) {
654 osrfGlobalTransportClient = client;
657 osrfStringArrayFree(arr);
667 if(osrfGlobalTransportClient)
674 @brief Disconnect from Jabber.
675 @return Zero in all cases.
677 int osrf_system_disconnect_client( void ) {
678 client_disconnect( osrfGlobalTransportClient );
679 client_free( osrfGlobalTransportClient );
680 osrfGlobalTransportClient = NULL;
685 @brief Shut down a laundry list of facilities typically used by servers.
688 - Settings from configuration file
690 - Connection to Jabber
691 - Settings from settings server
692 - Application sessions
695 int osrf_system_shutdown( void ) {
701 osrf_system_disconnect_client();
702 osrf_settings_free_host_config(NULL);
703 osrfAppSessionCleanup();
705 shutdownComplete = 1;