3 @brief Launch a collection of servers.
12 #include <sys/select.h>
16 #include "opensrf/utils.h"
17 #include "opensrf/log.h"
18 #include "opensrf/osrf_system.h"
19 #include "opensrf/osrf_application.h"
20 #include "opensrf/osrf_prefork.h"
23 #define HOST_NAME_MAX 256
26 static void report_child_status( pid_t pid, int status );
28 typedef struct child_node ChildNode;
31 @brief Represents a child process.
35 ChildNode* pNext; /**< Linkage pointer for doubly linked list. */
36 ChildNode* pPrev; /**< Linkage pointer for doubly linked list. */
37 pid_t pid; /**< Process ID of the child process. */
42 /** List of child processes. */
43 static ChildNode* child_list;
45 /** Pointer to the global transport_client; i.e. our connection to Jabber. */
46 static transport_client* osrfGlobalTransportClient = NULL;
48 /** Switch to be set by signal handler */
49 static volatile sig_atomic_t sig_caught;
51 /** Boolean: set to true when we finish shutting down. */
52 static int shutdownComplete = 0;
54 /** Name of file to which to write the process ID of the child process */
55 char* pidfile_name = NULL;
57 static void add_child( pid_t pid, const char* app, const char* libfile );
58 static void delete_child( ChildNode* node );
59 static void delete_all_children( void );
60 static ChildNode* seek_child( pid_t pid );
63 @brief Wait on all dead child processes so that they won't be zombies.
65 static void reap_children( void ) {
67 if( SIGTERM == sig_caught || SIGINT == sig_caught ) {
68 osrfLogInfo( OSRF_LOG_MARK, "Killed by %s; terminating",
69 SIGTERM == sig_caught ? "SIGTERM" : "SIGINT" );
71 osrfLogInfo( OSRF_LOG_MARK, "Killed by signal %d; terminating", (int) sig_caught );
74 // If we caught a signal, then the signal handler already did a kill().
75 // If we didn't, then do the kill() now.
79 sleep(1); /* Give the children a chance to die before we reap them. */
81 // Wait for each dead child. The WNOHANG option means to return immediately if
82 // there are no dead children, instead of waiting for them to die. It is therefore
83 // possible for a child still to be alive when we exit this function, either because
84 // it intercepted the SIGTERM and ignored it, or because it took longer to die than
85 // the time we gave it.
87 while( (child_pid = waitpid(-1, NULL, WNOHANG)) > 0 )
88 osrfLogInfo(OSRF_LOG_MARK, "Killed child %d", child_pid);
90 // Remove all nodes from the list of child processes.
91 delete_all_children();
95 @brief Signal handler for SIGTERM and SIGINT.
97 Kill all child processes, and set a switch so that we'll know that the signal arrived.
99 static void handleKillSignal( int signo ) {
100 // First ignore SIGTERM. Otherwise we would send SIGTERM to ourself, intercept it,
101 // and kill() again in an endless loop.
102 signal( SIGTERM, SIG_IGN );
104 //Kill all child processes. This is safe to do in a signal handler, because POSIX
105 // specifies that kill() is reentrant. It is necessary because, if we did the kill()
106 // only in reap_children() (above), then there would be a narrow window of vulnerability
107 // in the main loop: if the signal arrives between checking sig_caught and calling wait(),
108 // we would wait indefinitely for a child to die on its own.
114 @brief Return a pointer to the global transport_client.
115 @return Pointer to the global transport_client, or NULL.
117 A given process needs only one connection to Jabber, so we keep it a pointer to it at
118 file scope. This function returns that pointer.
120 If the connection has been opened by a previous call to osrfSystemBootstrapClientResc(),
121 return the pointer. Otherwise return NULL.
123 transport_client* osrfSystemGetTransportClient( void ) {
124 return osrfGlobalTransportClient;
128 @brief Save a copy of a file name to be used for writing a process ID.
129 @param name Designated file name, or NULL.
131 Save a file name for later use in saving a process ID. If @a name is NULL, leave
134 When the parent process spawns a child, the child becomes a daemon. The parent writes the
135 child's process ID to the PID file, if one has been designated, so that some other process
136 can retrieve the PID later and kill the daemon.
138 void osrfSystemSetPidFile( const char* name ) {
140 free( pidfile_name );
143 pidfile_name = strdup( name );
149 @brief Discard the global transport_client, but without disconnecting from Jabber.
151 To be called by a child process in order to disregard the parent's connection without
152 disconnecting it, since disconnecting would disconnect the parent as well.
154 void osrfSystemIgnoreTransportClient() {
155 client_discard( osrfGlobalTransportClient );
156 osrfGlobalTransportClient = NULL;
160 @brief Bootstrap a generic application from info in the configuration file.
161 @param config_file Name of the configuration file.
162 @param contextnode Name of an aggregate within the configuration file, containing the
163 relevant subset of configuration stuff.
164 @return 1 if successful; zero or -1 if error.
166 - Load the configuration file.
168 - Open a connection to Jabber.
170 A thin wrapper for osrfSystemBootstrapClientResc, passing it NULL for a resource.
172 int osrf_system_bootstrap_client( char* config_file, char* contextnode ) {
173 return osrfSystemBootstrapClientResc(config_file, contextnode, NULL);
177 @brief Connect to one or more cache servers.
178 @return Zero in all cases.
180 int osrfSystemInitCache( void ) {
182 jsonObject* cacheServers = osrf_settings_host_value_object("/cache/global/servers/server");
183 char* maxCache = osrf_settings_host_value("/cache/global/max_cache_time");
185 if( cacheServers && maxCache) {
187 if( cacheServers->type == JSON_ARRAY ) {
189 const char* servers[cacheServers->size];
190 for( i = 0; i != cacheServers->size; i++ ) {
191 servers[i] = jsonObjectGetString( jsonObjectGetIndex(cacheServers, i) );
192 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[i]);
194 osrfCacheInit( servers, cacheServers->size, atoi(maxCache) );
197 const char* servers[] = { jsonObjectGetString(cacheServers) };
198 osrfLogInfo( OSRF_LOG_MARK, "Adding cache server %s", servers[0]);
199 osrfCacheInit( servers, 1, atoi(maxCache) );
203 osrfLogError( OSRF_LOG_MARK, "Missing config value for /cache/global/servers/server _or_ "
204 "/cache/global/max_cache_time");
207 jsonObjectFree( cacheServers );
212 @brief Launch a collection of servers, as defined by the settings server.
213 @param hostname Full network name of the host where the process is running; or
215 @param configfile Name of the configuration file; normally '/openils/conf/opensrf_core.xml'.
216 @param contextNode Name of an aggregate within the configuration file, containing the
217 relevant subset of configuration stuff.
218 @return - Zero if successful, or -1 if not.
220 int osrfSystemBootstrap( const char* hostname, const char* configfile,
221 const char* contextNode ) {
222 if( !(hostname && configfile && contextNode) )
225 // Load the conguration, open the log, open a connection to Jabber
226 if(!osrfSystemBootstrapClientResc(configfile, contextNode, "settings_grabber" )) {
227 osrfLogError( OSRF_LOG_MARK,
228 "Unable to bootstrap for host %s from configuration file %s",
229 hostname, configfile );
233 shutdownComplete = 0;
235 // Get a list of applications to launch from the settings server
236 int retcode = osrf_settings_retrieve(hostname);
237 osrf_system_disconnect_client();
240 osrfLogError( OSRF_LOG_MARK,
241 "Unable to retrieve settings for host %s from configuration file %s",
242 hostname, configfile );
246 // Turn into a daemon. The parent forks and exits. Only the
247 // child returns, with the standard streams (stdin, stdout, and
248 // stderr) redirected to /dev/null.
251 jsonObject* apps = osrf_settings_host_value_object("/activeapps/appname");
252 osrfStringArray* arr = osrfNewStringArray(8);
257 if(apps->type == JSON_STRING) {
258 osrfStringArrayAdd(arr, jsonObjectGetString(apps));
261 const jsonObject* app;
262 while( (app = jsonObjectGetIndex(apps, i++)) )
263 osrfStringArrayAdd(arr, jsonObjectGetString(app));
265 jsonObjectFree(apps);
267 const char* appname = NULL;
268 int first_launch = 1; // Boolean
270 while( (appname = osrfStringArrayGetString(arr, i++)) ) {
272 char* lang = osrf_settings_host_value("/apps/%s/language", appname);
274 if(lang && !strcasecmp(lang,"c")) {
276 char* libfile = osrf_settings_host_value("/apps/%s/implementation", appname);
278 if(! (appname && libfile) ) {
279 osrfLogWarning( OSRF_LOG_MARK, "Missing appname / libfile in settings config");
283 osrfLogInfo( OSRF_LOG_MARK, "Launching application %s with implementation %s",
288 if( (pid = fork()) ) { // if parent
289 // store pid in local list for re-launching dead children...
290 add_child( pid, appname, libfile );
291 osrfLogInfo( OSRF_LOG_MARK, "Running application child %s: process id %ld",
292 appname, (long) pid );
296 // Write our own PID to a PID file so that somebody can use it to
297 // send us a signal later. If we don't find any C apps to launch,
298 // then we will quietly exit without writing a PID file, and without
299 // waiting to be killed by a signal.
301 FILE* pidfile = fopen( pidfile_name, "w" );
303 osrfLogError( OSRF_LOG_MARK, "Unable to open PID file \"%s\": %s",
304 pidfile_name, strerror( errno ) );
305 free( pidfile_name );
309 fprintf( pidfile, "%ld\n", (long) getpid() );
316 } else { // if child, run the application
318 osrfLogInfo( OSRF_LOG_MARK, " * Running application %s\n", appname);
320 free( pidfile_name ); // tidy up some debris from the parent
323 if( osrfAppRegisterApplication( appname, libfile ) == 0 )
324 osrf_prefork_run(appname);
326 osrfLogDebug( OSRF_LOG_MARK, "Server exiting for app %s and library %s\n",
334 osrfStringArrayFree(arr);
336 signal(SIGTERM, handleKillSignal);
337 signal(SIGINT, handleKillSignal);
339 // Wait indefinitely for all the child processes to terminate, or for a signal to
340 // tell us to stop. When there are no more child processes, wait() returns an
341 // ECHILD error and we break out of the loop.
344 while( ! sig_caught ) {
345 pid = wait( &status );
347 if( errno == ECHILD )
348 osrfLogError( OSRF_LOG_MARK, "We have no more live services... exiting" );
349 else if( errno != EINTR )
350 osrfLogError(OSRF_LOG_MARK, "Exiting top-level system loop with error: %s",
353 // Since we're not being killed by a signal as usual, delete the PID file
354 // so that no one will try to kill us when we're already dead.
356 remove( pidfile_name );
359 report_child_status( pid, status );
365 osrf_system_disconnect_client();
366 osrf_settings_free_host_config(NULL);
367 free( pidfile_name );
373 @brief Report the exit status of a dead child process, then remove it from the list.
374 @param pid Process ID of the child.
375 @param status Exit status as captured by wait().
377 static void report_child_status( pid_t pid, int status )
381 ChildNode* node = seek_child( pid );
384 app = node->app ? node->app : "[unknown]";
385 libfile = node->libfile ? node->libfile : "[none]";
389 if( WIFEXITED( status ) )
391 int rc = WEXITSTATUS( status ); // return code of child process
393 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) exited with return code %d",
394 (long) pid, app, rc );
396 osrfLogInfo( OSRF_LOG_MARK, "Child process %ld (app %s) exited normally",
399 else if( WIFSIGNALED( status ) )
401 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) killed by signal %d",
402 (long) pid, app, WTERMSIG( status) );
404 else if( WIFSTOPPED( status ) )
406 osrfLogError( OSRF_LOG_MARK, "Child process %ld (app %s) stopped by signal %d",
407 (long) pid, app, (int) WSTOPSIG( status ) );
410 delete_child( node );
413 /*----------- Routines to manage list of children --*/
416 @brief Add a node to the list of child processes.
417 @param pid Process ID of the child process.
418 @param app Name of the child application.
419 @param libfile Name of the shared library where the child process resides.
421 static void add_child( pid_t pid, const char* app, const char* libfile )
423 /* Construct new child node */
425 ChildNode* node = safe_malloc( sizeof( ChildNode ) );
430 node->app = strdup( app );
435 node->libfile = strdup( libfile );
437 node->libfile = NULL;
439 /* Add new child node to the head of the list */
441 node->pNext = child_list;
445 child_list->pPrev = node;
451 @brief Remove a node from the list of child processes.
452 @param node Pointer to the node to be removed.
454 static void delete_child( ChildNode* node ) {
461 /* Detach the node from the list */
464 node->pPrev->pNext = node->pNext;
466 child_list = node->pNext;
469 node->pNext->pPrev = node->pPrev;
471 /* Deallocate the node and its payload */
474 free( node->libfile );
479 @brief Remove all nodes from the list of child processes, rendering it empty.
481 static void delete_all_children( void ) {
484 delete_child( child_list );
488 @brief Find the node for a child process of a given process ID.
489 @param pid The process ID of the child process.
490 @return A pointer to the corresponding node if found; otherwise NULL.
492 static ChildNode* seek_child( pid_t pid ) {
494 /* Return a pointer to the child node for the */
495 /* specified process ID, or NULL if not found */
497 ChildNode* node = child_list;
499 if( node->pid == pid )
508 /*----------- End of routines to manage list of children --*/
511 @brief Bootstrap a generic application from info in the configuration file.
512 @param config_file Name of the configuration file.
513 @param contextnode Name of an aggregate within the configuration file, containing the
514 relevant subset of configuration stuff.
515 @param resource Used to construct a Jabber resource name; may be NULL.
516 @return 1 if successful; zero or -1 if error.
518 - Load the configuration file.
520 - Open a connection to Jabber.
522 int osrfSystemBootstrapClientResc( const char* config_file,
523 const char* contextnode, const char* resource ) {
527 if(osrfSystemGetTransportClient()) {
528 osrfLogInfo(OSRF_LOG_MARK, "Client is already bootstrapped");
529 return 1; /* we already have a client connection */
532 if( !( config_file && contextnode ) && ! osrfConfigHasDefaultConfig() ) {
533 osrfLogError( OSRF_LOG_MARK, "No Config File Specified\n" );
538 osrfConfig* cfg = osrfConfigInit( config_file, contextnode );
540 osrfConfigSetDefaultConfig(cfg);
542 return 0; /* Can't load configuration? Bail out */
545 char* log_file = osrfConfigGetValue( NULL, "/logfile");
547 fprintf(stderr, "No log file specified in configuration file %s\n",
552 char* log_level = osrfConfigGetValue( NULL, "/loglevel" );
553 osrfStringArray* arr = osrfNewStringArray(8);
554 osrfConfigGetValueList(NULL, arr, "/domain");
556 char* username = osrfConfigGetValue( NULL, "/username" );
557 char* password = osrfConfigGetValue( NULL, "/passwd" );
558 char* port = osrfConfigGetValue( NULL, "/port" );
559 char* unixpath = osrfConfigGetValue( NULL, "/unixpath" );
560 char* facility = osrfConfigGetValue( NULL, "/syslog" );
561 char* actlog = osrfConfigGetValue( NULL, "/actlog" );
563 /* if we're a source-client, tell the logger */
564 char* isclient = osrfConfigGetValue(NULL, "/client");
565 if( isclient && !strcasecmp(isclient,"true") )
566 osrfLogSetIsClient(1);
571 if(port) iport = atoi(port);
572 if(log_level) llevel = atoi(log_level);
574 if(!strcmp(log_file, "syslog")) {
575 osrfLogInit( OSRF_LOG_TYPE_SYSLOG, contextnode, llevel );
576 osrfLogSetSyslogFacility(osrfLogFacilityToInt(facility));
577 if(actlog) osrfLogSetSyslogActFacility(osrfLogFacilityToInt(actlog));
580 osrfLogInit( OSRF_LOG_TYPE_FILE, contextnode, llevel );
581 osrfLogSetFile( log_file );
585 /* Get a domain, if one is specified */
586 const char* domain = osrfStringArrayGetString( arr, 0 ); /* just the first for now */
588 fprintf(stderr, "No domain specified in configuration file %s\n", config_file);
589 osrfLogError( OSRF_LOG_MARK, "No domain specified in configuration file %s\n",
595 fprintf(stderr, "No username specified in configuration file %s\n", config_file);
596 osrfLogError( OSRF_LOG_MARK, "No username specified in configuration file %s\n",
602 fprintf(stderr, "No password specified in configuration file %s\n", config_file);
603 osrfLogError( OSRF_LOG_MARK, "No password specified in configuration file %s\n",
608 if((iport <= 0) && !unixpath) {
609 fprintf(stderr, "No unixpath or valid port in configuration file %s\n", config_file);
610 osrfLogError( OSRF_LOG_MARK, "No unixpath or valid port in configuration file %s\n",
616 osrfStringArrayFree(arr);
628 osrfLogInfo( OSRF_LOG_MARK, "Bootstrapping system with domain %s, port %d, and unixpath %s",
629 domain, iport, unixpath ? unixpath : "(none)" );
630 transport_client* client = client_init( domain, iport, unixpath, 0 );
632 char host[HOST_NAME_MAX + 1] = "";
633 gethostname(host, sizeof(host) );
634 host[HOST_NAME_MAX] = '\0';
638 snprintf(tbuf, 32, "%f", get_timestamp_millis());
640 if(!resource) resource = "";
642 int len = strlen(resource) + 256;
645 snprintf(buf, len - 1, "%s_%s_%s_%ld", resource, host, tbuf, (long) getpid() );
647 if(client_connect( client, username, password, buf, 10, AUTH_DIGEST )) {
648 osrfGlobalTransportClient = client;
651 osrfStringArrayFree(arr);
661 if(osrfGlobalTransportClient)
668 @brief Disconnect from Jabber.
669 @return Zero in all cases.
671 int osrf_system_disconnect_client( void ) {
672 client_disconnect( osrfGlobalTransportClient );
673 client_free( osrfGlobalTransportClient );
674 osrfGlobalTransportClient = NULL;
679 @brief Shut down a laundry list of facilities typically used by servers.
682 - Settings from configuration file
684 - Connection to Jabber
685 - Settings from settings server
686 - Application sessions
689 int osrf_system_shutdown( void ) {
695 osrf_system_disconnect_client();
696 osrf_settings_free_host_config(NULL);
697 osrfAppSessionCleanup();
699 shutdownComplete = 1;