3 @brief Spawn and manage a collection of child process to service requests.
5 Spawn a collection of child processes, replacing them as needed. Forward requests to them
6 and let the children do the work.
8 Each child processes some maximum number of requests before it terminates itself. When a
9 child dies, either deliberately or otherwise, we can spawn another one to replace it,
10 keeping the number of children within a predefined range.
12 Use a doubly-linked circular list to keep track of the children to whom we have forwarded
13 a request, and who are still working on them. Use a separate linear linked list to keep
14 track of children that are currently idle. Move them back and forth as needed.
16 For each child, set up two pipes:
17 - One for the parent to send requests to the child.
18 - One for the child to notify the parent that it is available for another request.
20 The message sent to the child represents an XML stanza as received from Jabber.
22 When the child finishes processing the request, it writes the string "available" back
23 to the parent. Then the parent knows that it can send that child another request.
28 #include <sys/types.h>
34 #include <sys/select.h>
37 #include "opensrf/utils.h"
38 #include "opensrf/log.h"
39 #include "opensrf/transport_client.h"
40 #include "opensrf/osrf_stack.h"
41 #include "opensrf/osrf_settings.h"
42 #include "opensrf/osrf_application.h"
44 #define READ_BUFSIZE 1024
45 #define ABS_MAX_CHILDREN 256
48 int max_requests; /**< How many requests a child processes before terminating. */
49 int min_children; /**< Minimum number of children to maintain. */
50 int max_children; /**< Maximum number of children to maintain. */
51 int fd; /**< Unused. */
52 int data_to_child; /**< Unused. */
53 int data_to_parent; /**< Unused. */
54 int current_num_children; /**< How many children are currently on the list. */
55 int keepalive; /**< Keepalive time for stateful sessions. */
56 char* appname; /**< Name of the application. */
57 /** Points to a circular linked list of children. */
58 struct prefork_child_struct* first_child;
59 /** List of of child processes that aren't doing anything at the moment and are
60 therefore available to service a new request. */
61 struct prefork_child_struct* idle_list;
62 /** List of allocated but unused prefork_children, available for reuse. Each one is just
63 raw memory, apart from the "next" pointer used to stitch them together. In particular,
64 there is no child process for them, and the file descriptors are not open. */
65 struct prefork_child_struct* free_list;
66 struct prefork_child_struct* sighup_pending_list;
67 transport_client* connection; /**< Connection to Jabber. */
70 struct prefork_child_struct {
71 pid_t pid; /**< Process ID of the child. */
72 int read_data_fd; /**< Child uses to read request. */
73 int write_data_fd; /**< Parent uses to write request. */
74 int read_status_fd; /**< Parent reads to see if child is available. */
75 int write_status_fd; /**< Child uses to notify parent when it's available again. */
76 int max_requests; /**< How many requests a child can process before terminating. */
77 const char* appname; /**< Name of the application. */
78 int keepalive; /**< Keepalive time for stateful sessions. */
79 struct prefork_child_struct* next; /**< Linkage pointer for linked list. */
80 struct prefork_child_struct* prev; /**< Linkage pointer for linked list. */
83 typedef struct prefork_child_struct prefork_child;
85 /** Boolean. Set to true by a signal handler when it traps SIGCHLD. */
86 static volatile sig_atomic_t child_dead;
88 static int prefork_simple_init( prefork_simple* prefork, transport_client* client,
89 int max_requests, int min_children, int max_children );
90 static prefork_child* launch_child( prefork_simple* forker );
91 static void prefork_launch_children( prefork_simple* forker );
92 static void prefork_run( prefork_simple* forker );
93 static void add_prefork_child( prefork_simple* forker, prefork_child* child );
95 static void del_prefork_child( prefork_simple* forker, pid_t pid );
96 static int check_children( prefork_simple* forker, int forever );
97 static int prefork_child_process_request( prefork_child*, char* data );
98 static int prefork_child_init_hook( prefork_child* );
99 static prefork_child* prefork_child_init( prefork_simple* forker,
100 int read_data_fd, int write_data_fd,
101 int read_status_fd, int write_status_fd );
103 /* listens on the 'data_to_child' fd and wait for incoming data */
104 static void prefork_child_wait( prefork_child* child );
105 static void prefork_clear( prefork_simple*, bool graceful);
106 static void prefork_child_free( prefork_simple* forker, prefork_child* );
107 static void osrf_prefork_register_routers( const char* appname, bool unregister );
108 static void osrf_prefork_child_exit( prefork_child* );
110 static void sigchld_handler( int sig );
111 static void sigusr1_handler( int sig );
112 static void sigusr2_handler( int sig );
113 static void sigterm_handler( int sig );
114 static void sigint_handler( int sig );
115 static void sighup_handler( int sig );
117 /** Maintain a global pointer to the prefork_simple object
118 * for the current process so we can refer to it later
119 * for signal handling. There will only ever be one
120 * forker per process.
122 static prefork_simple *global_forker = NULL;
125 @brief Spawn and manage a collection of drone processes for servicing requests.
126 @param appname Name of the application.
127 @return 0 if successful, or -1 if error.
129 int osrf_prefork_run( const char* appname ) {
132 osrfLogError( OSRF_LOG_MARK, "osrf_prefork_run requires an appname to run!");
136 set_proc_title( "OpenSRF Listener [%s]", appname );
143 // Get configuration settings
144 osrfLogInfo( OSRF_LOG_MARK, "Loading config in osrf_forker for app %s", appname );
146 char* max_req = osrf_settings_host_value( "/apps/%s/unix_config/max_requests", appname );
147 char* min_children = osrf_settings_host_value( "/apps/%s/unix_config/min_children", appname );
148 char* max_children = osrf_settings_host_value( "/apps/%s/unix_config/max_children", appname );
149 char* keepalive = osrf_settings_host_value( "/apps/%s/keepalive", appname );
152 osrfLogWarning( OSRF_LOG_MARK, "Keepalive is not defined, assuming %d", kalive );
154 kalive = atoi( keepalive );
157 osrfLogWarning( OSRF_LOG_MARK, "Max requests not defined, assuming %d", maxr );
159 maxr = atoi( max_req );
162 osrfLogWarning( OSRF_LOG_MARK, "Min children not defined, assuming %d", minc );
164 minc = atoi( min_children );
167 osrfLogWarning( OSRF_LOG_MARK, "Max children not defined, assuming %d", maxc );
169 maxc = atoi( max_children );
173 free( min_children );
174 free( max_children );
175 /* --------------------------------------------------- */
177 char* resc = va_list_to_string( "%s_listener", appname );
179 // Make sure that we haven't already booted
180 if( !osrfSystemBootstrapClientResc( NULL, NULL, resc )) {
181 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()" );
188 prefork_simple forker;
190 if( prefork_simple_init( &forker, osrfSystemGetTransportClient(), maxr, minc, maxc )) {
191 osrfLogError( OSRF_LOG_MARK,
192 "osrf_prefork_run() failed to create prefork_simple object" );
196 // Finish initializing the prefork_simple.
197 forker.appname = strdup( appname );
198 forker.keepalive = kalive;
199 global_forker = &forker;
201 // Spawn the children; put them in the idle list.
202 prefork_launch_children( &forker );
204 // Tell the router that you're open for business.
205 osrf_prefork_register_routers( appname, false );
207 signal( SIGUSR1, sigusr1_handler);
208 signal( SIGUSR2, sigusr2_handler);
209 signal( SIGTERM, sigterm_handler);
210 signal( SIGINT, sigint_handler );
211 signal( SIGQUIT, sigint_handler );
212 signal( SIGHUP, sighup_handler );
214 // Sit back and let the requests roll in
215 osrfLogInfo( OSRF_LOG_MARK, "Launching osrf_forker for app %s", appname );
216 prefork_run( &forker );
218 osrfLogWarning( OSRF_LOG_MARK, "prefork_run() returned - how??" );
219 prefork_clear( &forker, false );
224 @brief Register the application with a specified router.
225 @param appname Name of the application.
226 @param routerName Name of the router.
227 @param routerDomain Domain of the router.
229 Tell the router that you're open for business so that it can route requests to you.
231 Called only by the parent process.
233 static void osrf_prefork_send_router_registration(
234 const char* appname, const char* routerName,
235 const char* routerDomain, bool unregister ) {
237 // Get a pointer to the global transport_client
238 transport_client* client = osrfSystemGetTransportClient();
240 // Construct the Jabber address of the router
241 char* jid = va_list_to_string( "%s@%s/router", routerName, routerDomain );
243 // Create the registration message, and send it
244 transport_message* msg;
247 osrfLogInfo( OSRF_LOG_MARK, "%s un-registering with router %s", appname, jid );
248 msg = message_init( "unregistering", NULL, NULL, jid, NULL );
249 message_set_router_info( msg, NULL, NULL, appname, "unregister", 0 );
253 osrfLogInfo( OSRF_LOG_MARK, "%s registering with router %s", appname, jid );
254 msg = message_init( "registering", NULL, NULL, jid, NULL );
255 message_set_router_info( msg, NULL, NULL, appname, "register", 0 );
258 client_send_message( client, msg );
266 @brief Register with a router, or not, according to some config settings.
267 @param appname Name of the application
268 @param RouterChunk A representation of part of the config file.
270 Parse a "complex" router configuration chunk.
272 Examine the services listed for a given router (normally in opensrf_core.xml). If
273 there is an entry for this service, or if there are @em no services listed, then
274 register with this router. Otherwise don't.
276 Called only by the parent process.
278 static void osrf_prefork_parse_router_chunk(
279 const char* appname, const jsonObject* routerChunk, bool unregister ) {
281 const char* routerName = jsonObjectGetString( jsonObjectGetKeyConst( routerChunk, "name" ));
282 const char* domain = jsonObjectGetString( jsonObjectGetKeyConst( routerChunk, "domain" ));
283 const jsonObject* services = jsonObjectGetKeyConst( routerChunk, "services" );
284 osrfLogDebug( OSRF_LOG_MARK, "found router config with domain %s and name %s",
285 routerName, domain );
287 if( services && services->type == JSON_HASH ) {
288 osrfLogDebug( OSRF_LOG_MARK, "investigating router information..." );
289 const jsonObject* service_obj = jsonObjectGetKeyConst( services, "service" );
291 ; // do nothing (shouldn't happen)
292 else if( JSON_ARRAY == service_obj->type ) {
293 // There are multiple services listed. Register with this router
294 // if and only if this service is on the list.
296 for( j = 0; j < service_obj->size; j++ ) {
297 const char* service = jsonObjectGetString( jsonObjectGetIndex( service_obj, j ));
298 if( service && !strcmp( appname, service ))
299 osrf_prefork_send_router_registration( appname, routerName, domain, unregister );
302 else if( JSON_STRING == service_obj->type ) {
303 // There's only one service listed. Register with this router
304 // if and only if this service is the one listed.
305 if( !strcmp( appname, jsonObjectGetString( service_obj )) )
306 osrf_prefork_send_router_registration( appname, routerName, domain, unregister );
309 // This router is not restricted to any set of services,
310 // so go ahead and register with it.
311 osrf_prefork_send_router_registration( appname, routerName, domain, unregister );
316 @brief Register the application with one or more routers, according to the configuration.
317 @param appname Name of the application.
319 Called only by the parent process.
321 static void osrf_prefork_register_routers( const char* appname, bool unregister ) {
323 jsonObject* routerInfo = osrfConfigGetValueObject( NULL, "/routers/router" );
326 for( i = 0; i < routerInfo->size; i++ ) {
327 const jsonObject* routerChunk = jsonObjectGetIndex( routerInfo, i );
329 if( routerChunk->type == JSON_STRING ) {
330 /* this accomodates simple router configs */
331 char* routerName = osrfConfigGetValue( NULL, "/router_name" );
332 char* domain = osrfConfigGetValue( NULL, "/routers/router" );
333 osrfLogDebug( OSRF_LOG_MARK, "found simple router settings with router name %s",
335 osrf_prefork_send_router_registration( appname, routerName, domain, unregister );
340 osrf_prefork_parse_router_chunk( appname, routerChunk, unregister );
344 jsonObjectFree( routerInfo );
348 @brief Initialize a child process.
349 @param child Pointer to the prefork_child representing the new child process.
350 @return Zero if successful, or -1 if not.
352 Called only by child processes. Actions:
353 - Connect to one or more cache servers
354 - Reconfigure logger, if necessary
355 - Discard parent's Jabber connection and open a new one
356 - Dynamically call an application-specific initialization routine
357 - Change the command line as reported by ps
359 static int prefork_child_init_hook( prefork_child* child ) {
361 if( !child ) return -1;
362 osrfLogDebug( OSRF_LOG_MARK, "Child init hook for child %d", child->pid );
364 // Connect to cache server(s).
365 osrfSystemInitCache();
366 char* resc = va_list_to_string( "%s_drone", child->appname );
368 // If we're a source-client, tell the logger now that we're a new process.
369 char* isclient = osrfConfigGetValue( NULL, "/client" );
370 if( isclient && !strcasecmp( isclient,"true" ))
371 osrfLogSetIsClient( 1 );
374 // Remove traces of our parent's socket connection so we can have our own.
375 // TODO: not necessary if parent disconnects first
376 osrfSystemIgnoreTransportClient();
379 if( !osrfSystemBootstrapClientResc( NULL, NULL, resc )) {
380 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()" );
387 // Dynamically call the application-specific initialization function
388 // from a previously loaded shared library.
389 if( ! osrfAppRunChildInit( child->appname )) {
390 osrfLogDebug( OSRF_LOG_MARK, "Prefork child_init succeeded\n" );
392 osrfLogError( OSRF_LOG_MARK, "Prefork child_init failed\n" );
396 // Change the command line as reported by ps
397 set_proc_title( "OpenSRF Drone [%s]", child->appname );
402 @brief Respond to a client request forwarded by the parent.
403 @param child Pointer to the state of the child process.
404 @param data Pointer to the raw XMPP message received from the parent.
405 @return 0 on success; non-zero means that the child process should clean itself up
406 and terminate immediately, presumably due to a fatal error condition.
408 Called only by a child process.
410 static int prefork_child_process_request( prefork_child* child, char* data ) {
411 if( !child ) return 0;
413 transport_client* client = osrfSystemGetTransportClient();
415 // Make sure that we're still connected to Jabber; reconnect if necessary.
416 if( !client_connected( client )) {
417 osrfSystemIgnoreTransportClient();
418 osrfLogWarning( OSRF_LOG_MARK, "Reconnecting child to opensrf after disconnect..." );
419 if( !osrf_system_bootstrap_client( NULL, NULL )) {
420 osrfLogError( OSRF_LOG_MARK,
421 "Unable to bootstrap client in prefork_child_process_request()" );
423 osrf_prefork_child_exit( child );
427 // Construct the message from the xml.
428 transport_message* msg = new_message_from_xml( data );
430 // Respond to the transport message. This is where method calls are buried.
431 osrfAppSession* session = osrf_stack_transport_handler( msg, child->appname );
435 int rc = session->panic;
438 osrfLogWarning( OSRF_LOG_MARK,
439 "Drone for session %s terminating immediately", session->session_id );
440 osrfAppSessionFree( session );
444 if( session->stateless && session->state != OSRF_SESSION_CONNECTED ) {
445 // We're no longer connected to the client, which presumably means that
446 // we're done with this request. Bail out.
447 osrfAppSessionFree( session );
451 // If we get this far, then the client has opened an application connection so that it
452 // can send multiple requests directly to the same server drone, bypassing the router
453 // and the listener. For example, it may need to do a database transaction, requiring
454 // multiple method calls within the same database session.
456 // Hence we go into a loop, responding to successive requests from the same client, until
457 // either the client disconnects or an error occurs.
459 osrfLogDebug( OSRF_LOG_MARK, "Entering keepalive loop for session %s", session->session_id );
460 int keepalive = child->keepalive;
468 // Respond to any input messages. This is where the method calls are buried.
469 osrfLogDebug( OSRF_LOG_MARK,
470 "osrf_prefork calling queue_wait [%d] in keepalive loop", keepalive );
471 start = time( NULL );
472 retval = osrf_app_session_queue_wait( session, keepalive, &recvd );
475 osrfLogDebug( OSRF_LOG_MARK, "Data received == %d", recvd );
477 // Now we check a number of possible reasons to exit the loop.
479 // If the method call decided to terminate immediately,
480 // note that for future reference.
484 // If an error occurred when we tried to service the request, exit the loop.
486 osrfLogError( OSRF_LOG_MARK, "queue-wait returned non-success %d", retval );
490 // If the client disconnected, exit the loop.
491 if( session->state != OSRF_SESSION_CONNECTED )
494 // If we timed out while waiting for a request, exit the loop.
495 if( !recvd && (end - start) >= keepalive ) {
496 osrfLogInfo( OSRF_LOG_MARK,
497 "No request was received in %d seconds, exiting stateful session", keepalive );
498 osrfAppSessionStatus(
502 0, "Disconnected on timeout" );
507 // If the child process has decided to terminate immediately, exit the loop.
512 osrfLogDebug( OSRF_LOG_MARK, "Exiting keepalive loop for session %s", session->session_id );
513 osrfAppSessionFree( session );
518 @brief Partially initialize a prefork_simple provided by the caller.
519 @param prefork Pointer to a a raw prefork_simple to be initialized.
520 @param client Pointer to a transport_client (connection to Jabber).
521 @param max_requests The maximum number of requests that a child process may service
523 @param min_children Minimum number of child processes to maintain.
524 @param max_children Maximum number of child processes to maintain.
525 @return 0 if successful, or 1 if not (due to invalid parameters).
527 static int prefork_simple_init( prefork_simple* prefork, transport_client* client,
528 int max_requests, int min_children, int max_children ) {
530 if( min_children > max_children ) {
531 osrfLogError( OSRF_LOG_MARK, "min_children (%d) is greater "
532 "than max_children (%d)", min_children, max_children );
536 if( max_children > ABS_MAX_CHILDREN ) {
537 osrfLogError( OSRF_LOG_MARK, "max_children (%d) is greater than ABS_MAX_CHILDREN (%d)",
538 max_children, ABS_MAX_CHILDREN );
542 osrfLogInfo( OSRF_LOG_MARK, "Prefork launching child with max_request=%d,"
543 "min_children=%d, max_children=%d", max_requests, min_children, max_children );
545 /* flesh out the struct */
546 prefork->max_requests = max_requests;
547 prefork->min_children = min_children;
548 prefork->max_children = max_children;
550 prefork->data_to_child = 0;
551 prefork->data_to_parent = 0;
552 prefork->current_num_children = 0;
553 prefork->keepalive = 0;
554 prefork->appname = NULL;
555 prefork->first_child = NULL;
556 prefork->idle_list = NULL;
557 prefork->free_list = NULL;
558 prefork->connection = client;
559 prefork->sighup_pending_list = NULL;
565 @brief Spawn a new child process and put it in the idle list.
566 @param forker Pointer to the prefork_simple that will own the process.
567 @return Pointer to the new prefork_child, or not at all.
569 Spawn a new child process. Create a prefork_child for it and put it in the idle list.
571 After forking, the parent returns a pointer to the new prefork_child. The child
572 services its quota of requests and then terminates without returning.
574 static prefork_child* launch_child( prefork_simple* forker ) {
580 // Set up the data and status pipes
581 if( pipe( data_fd ) < 0 ) { /* build the data pipe*/
582 osrfLogError( OSRF_LOG_MARK, "Pipe making error" );
586 if( pipe( status_fd ) < 0 ) {/* build the status pipe */
587 osrfLogError( OSRF_LOG_MARK, "Pipe making error" );
593 osrfLogInternal( OSRF_LOG_MARK, "Pipes: %d %d %d %d",
594 data_fd[0], data_fd[1], status_fd[0], status_fd[1] );
596 // Create and initialize a prefork_child for the new process
597 prefork_child* child = prefork_child_init( forker, data_fd[0],
598 data_fd[1], status_fd[0], status_fd[1] );
600 if( (pid=fork()) < 0 ) {
601 osrfLogError( OSRF_LOG_MARK, "Forking Error" );
602 prefork_child_free( forker, child );
606 // Add the new child to the head of the idle list
607 child->next = forker->idle_list;
608 forker->idle_list = child;
610 if( pid > 0 ) { /* parent */
612 signal( SIGCHLD, sigchld_handler );
613 ( forker->current_num_children )++;
616 osrfLogDebug( OSRF_LOG_MARK, "Parent launched %d", pid );
617 /* *no* child pipe FD's can be closed or the parent will re-use fd's that
618 the children are currently using */
624 // we don't want to adopt our parent's handlers.
625 signal( SIGUSR1, SIG_DFL );
626 signal( SIGUSR2, SIG_DFL );
627 signal( SIGTERM, SIG_DFL );
628 signal( SIGINT, SIG_DFL );
629 signal( SIGQUIT, SIG_DFL );
630 signal( SIGCHLD, SIG_DFL );
631 signal( SIGHUP, SIG_DFL );
633 osrfLogInternal( OSRF_LOG_MARK,
634 "I am new child with read_data_fd = %d and write_status_fd = %d",
635 child->read_data_fd, child->write_status_fd );
637 child->pid = getpid();
638 close( child->write_data_fd );
639 close( child->read_status_fd );
642 if( prefork_child_init_hook( child ) == -1 ) {
643 osrfLogError( OSRF_LOG_MARK,
644 "Forker child going away because we could not connect to OpenSRF..." );
645 osrf_prefork_child_exit( child );
648 prefork_child_wait( child ); // Should exit without returning
649 osrf_prefork_child_exit( child ); // Just to be sure
650 return NULL; // Unreachable, but it keeps the compiler happy
655 @brief Terminate a child process.
656 @param child Pointer to the prefork_child representing the child process (not used).
658 Called only by child processes. Dynamically call an application-specific shutdown
659 function from a previously loaded shared library; then exit.
661 static void osrf_prefork_child_exit( prefork_child* child ) {
662 osrfAppRunExitCode();
667 @brief Launch all the child processes, putting them in the idle list.
668 @param forker Pointer to the prefork_simple that will own the children.
670 Called only by the parent process (in order to become a parent).
672 static void prefork_launch_children( prefork_simple* forker ) {
673 if( !forker ) return;
675 while( c++ < forker->min_children )
676 launch_child( forker );
680 @brief Signal handler for SIGCHLD: note that a child process has terminated.
681 @param sig The value of the trapped signal; always SIGCHLD.
683 Set a boolean to be checked later.
685 static void sigchld_handler( int sig ) {
686 signal( SIGCHLD, sigchld_handler );
691 @brief Signal handler for SIGUSR1
692 @param sig The value of the trapped signal; always SIGUSR1.
694 Send unregister command to all registered routers.
696 static void sigusr1_handler( int sig ) {
697 if (!global_forker) return;
698 osrf_prefork_register_routers(global_forker->appname, true);
699 signal( SIGUSR1, sigusr1_handler );
703 @brief Signal handler for SIGUSR2
704 @param sig The value of the trapped signal; always SIGUSR2.
706 Send register command to all known routers
708 static void sigusr2_handler( int sig ) {
709 if (!global_forker) return;
710 osrf_prefork_register_routers(global_forker->appname, false);
711 signal( SIGUSR2, sigusr2_handler );
715 @brief Signal handler for SIGTERM
716 @param sig The value of the trapped signal; always SIGTERM
718 Perform a graceful prefork server shutdown.
720 static void sigterm_handler(int sig) {
721 if (!global_forker) return;
722 osrfLogInfo(OSRF_LOG_MARK, "server: received SIGTERM, shutting down");
723 prefork_clear(global_forker, true);
728 @brief Signal handler for SIGINT or SIGQUIT
729 @param sig The value of the trapped signal
731 Perform a non-graceful prefork server shutdown.
733 static void sigint_handler(int sig) {
734 if (!global_forker) return;
735 osrfLogInfo(OSRF_LOG_MARK, "server: received SIGINT/QUIT, shutting down");
736 prefork_clear(global_forker, false);
740 static void sighup_handler(int sig) {
741 if (!global_forker) return;
742 osrfLogInfo(OSRF_LOG_MARK, "server: received SIGHUP, reloading config");
744 osrfConfig* oldConfig = osrfConfigGetDefaultConfig();
745 osrfConfig* newConfig = osrfConfigInit(
746 oldConfig->configFileName, oldConfig->configContext);
749 osrfLogError(OSRF_LOG_MARK, "Config reload failed");
754 osrfConfigSetDefaultConfig(newConfig);
756 // apply the log level from the reloaded file
757 char* log_level = osrfConfigGetValue(NULL, "/loglevel");
759 int level = atoi(log_level);
760 osrfLogSetLevel(level);
763 // Copy the list of active children into the sighup_pending list.
764 // Cloning is necessary, since the nodes in the active list, particularly
765 // their next/prev pointers, will start changing once we exit this func.
766 // sighup_pending_list is a non-circular, singly linked list.
767 prefork_child* cur_child = global_forker->first_child;
768 prefork_child* clone;
770 // the first_pid lets us know when we've made a full circle of the active
773 while (cur_child && cur_child->pid != first_pid) {
775 if (!first_pid) first_pid = cur_child->pid;
777 // all we need to keep track of is the pid
778 clone = safe_malloc(sizeof(prefork_child));
779 clone->pid = cur_child->pid;
782 osrfLogDebug(OSRF_LOG_MARK,
783 "Adding child %d to sighup pending list", clone->pid);
785 // add the clone to the front of the list
786 if (global_forker->sighup_pending_list)
787 clone->next = global_forker->sighup_pending_list;
788 global_forker->sighup_pending_list = clone;
790 cur_child = cur_child->next;
793 // Kill all idle children.
794 // Let them get cleaned up through the normal response-handling cycle
795 cur_child = global_forker->idle_list;
797 osrfLogDebug(OSRF_LOG_MARK, "Killing child in SIGHUP %d", cur_child->pid);
798 kill(cur_child->pid, SIGKILL);
799 cur_child = cur_child->next;
805 @brief Replenish the collection of child processes, after one has terminated.
806 @param forker Pointer to the prefork_simple that manages the child processes.
808 The parent calls this function when it notices (via a signal handler) that
809 a child process has died.
811 Wait on the dead children so that they won't be zombies. Spawn new ones as needed
812 to maintain at least a minimum number.
814 static void reap_children( prefork_simple* forker ) {
818 // Reset our boolean so that we can detect any further terminations.
821 // Bury the children so that they won't be zombies. WNOHANG means that waitpid() returns
822 // immediately if there are no waitable children, instead of waiting for more to die.
823 // Ignore the return code of the child. We don't do an autopsy.
824 while( (child_pid = waitpid( -1, NULL, WNOHANG )) > 0 ) {
825 --forker->current_num_children;
826 del_prefork_child( forker, child_pid );
829 // Spawn more children as needed.
830 while( forker->current_num_children < forker->min_children )
831 launch_child( forker );
835 @brief Read transport_messages and dispatch them to child processes for servicing.
836 @param forker Pointer to the prefork_simple that manages the child processes.
838 This is the main loop of the parent process, and once entered, does not exit.
840 For each usable transport_message received: look for an idle child to service it. If
841 no idle children are available, either spawn a new one or, if we've already spawned the
842 maximum number of children, wait for one to become available. Once a child is available
843 by whatever means, write an XML version of the input message, to a pipe designated for
846 static void prefork_run( prefork_simple* forker ) {
848 if( NULL == forker->idle_list )
849 return; // No available children, and we haven't even started yet
851 transport_message* cur_msg = NULL;
855 if( forker->first_child == NULL && forker->idle_list == NULL ) {/* no more children */
856 osrfLogWarning( OSRF_LOG_MARK, "No more children..." );
860 // Wait indefinitely for an input message
861 osrfLogDebug( OSRF_LOG_MARK, "Forker going into wait for data..." );
862 cur_msg = client_recv( forker->connection, -1 );
864 if( cur_msg == NULL ) {
865 // most likely a signal was received. clean up any recently
866 // deceased children and try again.
868 reap_children(forker);
872 message_prepare_xml( cur_msg );
873 const char* msg_data = cur_msg->msg_xml;
874 if( ! msg_data || ! *msg_data ) {
875 osrfLogWarning( OSRF_LOG_MARK, "Received % message from %s, thread %",
876 (msg_data ? "empty" : "NULL"), cur_msg->sender, cur_msg->thread );
877 message_free( cur_msg );
878 continue; // Message not usable; go on to the next one.
881 int honored = 0; /* will be set to true when we service the request */
887 if(check_children( forker, 0 ) < 0) {
888 continue; // check failed, try again
893 osrfLogDebug( OSRF_LOG_MARK, "Server received inbound data" );
895 prefork_child* cur_child = NULL;
897 // Look for an available child in the idle list. Since the idle list operates
898 // as a stack, the child we get is the one that was most recently active, or
899 // most recently spawned. That means it's the one most likely still to be in
900 // physical memory, and the one least likely to have to be swapped in.
901 while( forker->idle_list ) {
903 osrfLogDebug( OSRF_LOG_MARK, "Looking for idle child" );
904 // Grab the prefork_child at the head of the idle list
905 cur_child = forker->idle_list;
906 forker->idle_list = cur_child->next;
907 cur_child->next = NULL;
909 osrfLogInternal( OSRF_LOG_MARK,
910 "Searching for available child. cur_child->pid = %d", cur_child->pid );
911 osrfLogInternal( OSRF_LOG_MARK, "Current num children %d",
912 forker->current_num_children );
914 osrfLogDebug( OSRF_LOG_MARK, "forker sending data to %d", cur_child->pid );
915 osrfLogInternal( OSRF_LOG_MARK, "Writing to child fd %d",
916 cur_child->write_data_fd );
918 int written = write( cur_child->write_data_fd, msg_data, strlen( msg_data ) + 1 );
920 // This child appears to be dead or unusable. Discard it.
921 osrfLogWarning( OSRF_LOG_MARK, "Write returned error %d: %s",
922 errno, strerror( errno ));
923 kill( cur_child->pid, SIGKILL );
924 del_prefork_child( forker, cur_child->pid );
928 add_prefork_child( forker, cur_child ); // Add it to active list
933 /* if none available, add a new child if we can */
935 osrfLogDebug( OSRF_LOG_MARK, "Not enough children, attempting to add..." );
937 if( forker->current_num_children < forker->max_children ) {
938 osrfLogDebug( OSRF_LOG_MARK, "Launching new child with current_num = %d",
939 forker->current_num_children );
941 launch_child( forker ); // Put a new child into the idle list
942 if( forker->idle_list ) {
944 // Take the new child from the idle list
945 prefork_child* new_child = forker->idle_list;
946 forker->idle_list = new_child->next;
947 new_child->next = NULL;
949 osrfLogDebug( OSRF_LOG_MARK, "Writing to new child fd %d : pid %d",
950 new_child->write_data_fd, new_child->pid );
953 new_child->write_data_fd, msg_data, strlen( msg_data ) + 1 );
955 // This child appears to be dead or unusable. Discard it.
956 osrfLogWarning( OSRF_LOG_MARK, "Write returned error %d: %s",
957 errno, strerror( errno ));
958 kill( cur_child->pid, SIGKILL );
959 del_prefork_child( forker, cur_child->pid );
961 add_prefork_child( forker, new_child );
966 osrfLogWarning( OSRF_LOG_MARK, "Could not launch a new child as %d children "
967 "were already running; consider increasing max_children for this "
968 "application higher than %d in the OpenSRF configuration if this "
969 "message occurs frequently",
970 forker->current_num_children, forker->max_children );
975 osrfLogWarning( OSRF_LOG_MARK, "No children available, waiting..." );
976 if( check_children( forker, 1 ) >= 0 ) {
977 // Tell the loop not to call check_children again, since we just successfully called it
983 reap_children( forker );
985 } // end while( ! honored )
987 message_free( cur_msg );
989 } /* end top level listen loop */
994 @brief See if any children have become available.
995 @param forker Pointer to the prefork_simple that owns the children.
996 @param forever Boolean: true if we should wait indefinitely.
997 @return 0 or greater if successful, -1 on select error/interrupt
999 Call select() for all the children in the active list. Read each active file
1000 descriptor and move the corresponding child to the idle list.
1002 If @a forever is true, wait indefinitely for input. Otherwise return immediately if
1003 there are no active file descriptors.
1005 static int check_children( prefork_simple* forker, int forever ) {
1008 reap_children( forker );
1010 if( NULL == forker->first_child ) {
1011 // If forever is true, then we're here because we've run out of idle
1012 // processes, so there should be some active ones around, except during
1013 // graceful shutdown, as we wait for all active children to become idle.
1014 // If forever is false, then the children may all be idle, and that's okay.
1016 osrfLogDebug( OSRF_LOG_MARK, "No active child processes to check" );
1022 FD_ZERO( &read_set );
1026 // Prepare to select() on pipes from all the active children
1027 prefork_child* cur_child = forker->first_child;
1029 if( cur_child->read_status_fd > max_fd )
1030 max_fd = cur_child->read_status_fd;
1031 FD_SET( cur_child->read_status_fd, &read_set );
1032 cur_child = cur_child->next;
1033 } while( cur_child != forker->first_child );
1035 FD_CLR( 0, &read_set ); /* just to be sure */
1039 if( (select_ret=select( max_fd + 1, &read_set, NULL, NULL, NULL )) == -1 ) {
1040 osrfLogWarning( OSRF_LOG_MARK, "Select returned error %d on check_children: %s",
1041 errno, strerror( errno ));
1043 osrfLogInfo( OSRF_LOG_MARK,
1044 "select() completed after waiting on children to become available" );
1052 if( (select_ret=select( max_fd + 1, &read_set, NULL, NULL, &tv )) == -1 ) {
1053 osrfLogWarning( OSRF_LOG_MARK, "Select returned error %d on check_children: %s",
1054 errno, strerror( errno ));
1058 if( select_ret <= 0 ) // we're done here
1061 // Check each child in the active list.
1062 // If it has responded, move it to the idle list.
1063 cur_child = forker->first_child;
1064 prefork_child* next_child = NULL;
1065 int num_handled = 0;
1067 next_child = cur_child->next;
1068 if( FD_ISSET( cur_child->read_status_fd, &read_set )) {
1069 osrfLogDebug( OSRF_LOG_MARK,
1070 "Server received status from a child %d", cur_child->pid );
1074 /* now suck off the data */
1076 if( (n=read( cur_child->read_status_fd, buf, sizeof( buf ) - 1 )) < 0 ) {
1077 osrfLogWarning( OSRF_LOG_MARK,
1078 "Read error after select in child status read with errno %d: %s",
1079 errno, strerror( errno ));
1083 osrfLogDebug( OSRF_LOG_MARK, "Read %d bytes from status buffer: %s", n, buf );
1087 // if this child is in the sighup_pending list, kill the child,
1088 // but leave it in the active list so that it won't be picked
1089 // for new work. When reap_children() next runs, it will be
1090 // properly cleaned up.
1091 prefork_child* hup_child = forker->sighup_pending_list;
1092 prefork_child* prev_hup_child = NULL;
1093 int hup_cleanup = 0;
1096 pid_t hup_pid = hup_child->pid;
1097 if (hup_pid == cur_child->pid) {
1099 osrfLogDebug(OSRF_LOG_MARK,
1100 "server: killing previously-active child after "
1101 "receiving SIGHUP: %d", hup_pid);
1103 if (forker->sighup_pending_list == hup_child) {
1104 // hup_child is the first (maybe only) in the list
1105 forker->sighup_pending_list = hup_child->next;
1107 // splice it from the list
1108 prev_hup_child->next = hup_child->next;
1111 free(hup_child); // clean up the thin clone
1112 kill(hup_pid, SIGKILL);
1117 prev_hup_child = hup_child;
1118 hup_child = hup_child->next;
1123 // Remove the child from the active list
1124 if( forker->first_child == cur_child ) {
1125 if( cur_child->next == cur_child ) {
1126 // only child in the active list
1127 forker->first_child = NULL;
1129 forker->first_child = cur_child->next;
1132 cur_child->next->prev = cur_child->prev;
1133 cur_child->prev->next = cur_child->next;
1135 // Add it to the idle list
1136 cur_child->prev = NULL;
1137 cur_child->next = forker->idle_list;
1138 forker->idle_list = cur_child;
1142 cur_child = next_child;
1143 } while( forker->first_child && forker->first_child != next_child );
1149 @brief Service up a set maximum number of requests; then shut down.
1150 @param child Pointer to the prefork_child representing the child process.
1152 Called only by child process.
1154 Enter a loop, for up to max_requests iterations. On each iteration:
1155 - Wait indefinitely for a request from the parent.
1156 - Service the request.
1157 - Increment a counter. If the limit hasn't been reached, notify the parent that you
1158 are available for another request.
1160 After exiting the loop, shut down and terminate the process.
1162 static void prefork_child_wait( prefork_child* child ) {
1165 growing_buffer* gbuf = buffer_init( READ_BUFSIZE );
1166 char buf[READ_BUFSIZE];
1168 for( i = 0; i < child->max_requests; i++ ) {
1171 int gotdata = 0; // boolean; set to true if we get data
1172 clr_fl( child->read_data_fd, O_NONBLOCK );
1174 // Read a request from the parent, via a pipe, into a growing_buffer.
1175 while( (n = read( child->read_data_fd, buf, READ_BUFSIZE-1 )) > 0 ) {
1177 osrfLogDebug( OSRF_LOG_MARK, "Prefork child read %d bytes of data", n );
1179 set_fl( child->read_data_fd, O_NONBLOCK );
1182 buffer_add_n( gbuf, buf, n );
1185 if( errno == EAGAIN )
1188 if( errno == EPIPE ) {
1189 osrfLogDebug( OSRF_LOG_MARK, "C child attempted read on broken pipe, exiting..." );
1193 int terminate_now = 0; // Boolean
1196 osrfLogWarning( OSRF_LOG_MARK,
1197 "Prefork child read returned error with errno %d", errno );
1200 } else if( gotdata ) {
1201 // Process the request
1202 osrfLogDebug( OSRF_LOG_MARK, "Prefork child got a request.. processing.." );
1203 terminate_now = prefork_child_process_request( child, gbuf->buf );
1204 buffer_reset( gbuf );
1207 if( terminate_now ) {
1208 // We're terminating prematurely -- presumably due to a fatal error condition.
1209 osrfLogWarning( OSRF_LOG_MARK, "Prefork child terminating abruptly" );
1213 if( i < child->max_requests - 1 ) {
1214 // Report back to the parent for another request.
1216 ssize_t len = write(
1217 child->write_status_fd, "available" /*less than 64 bytes*/, msg_len );
1218 if( len != msg_len ) {
1219 osrfLogError( OSRF_LOG_MARK,
1220 "Drone terminating: unable to notify listener of availability: %s",
1222 buffer_free( gbuf );
1223 osrf_prefork_child_exit( child );
1228 buffer_free( gbuf );
1230 osrfLogDebug( OSRF_LOG_MARK, "Child with max-requests=%d, num-served=%d exiting...[%ld]",
1231 child->max_requests, i, (long) getpid());
1233 osrf_prefork_child_exit( child );
1237 @brief Add a prefork_child to the end of the active list.
1238 @param forker Pointer to the prefork_simple that owns the list.
1239 @param child Pointer to the prefork_child to be added.
1241 static void add_prefork_child( prefork_simple* forker, prefork_child* child ) {
1243 if( forker->first_child == NULL ) {
1244 // Simplest case: list is initially empty.
1245 forker->first_child = child;
1246 child->next = child;
1247 child->prev = child;
1249 // Find the last node in the circular list.
1250 prefork_child* last_child = forker->first_child->prev;
1252 // Insert the new child between the last and first children.
1253 last_child->next = child;
1254 child->prev = last_child;
1255 child->next = forker->first_child;
1256 forker->first_child->prev = child;
1261 @brief Delete and destroy a dead child from our list.
1262 @param forker Pointer to the prefork_simple that owns the dead child.
1263 @param pid Process ID of the dead child.
1265 Look for the dead child first in the list of active children. If you don't find it
1266 there, look in the list of idle children. If you find it, remove it from whichever
1267 list it's on, and destroy it.
1269 static void del_prefork_child( prefork_simple* forker, pid_t pid ) {
1271 osrfLogDebug( OSRF_LOG_MARK, "Deleting Child: %d", pid );
1273 prefork_child* cur_child = NULL;
1275 // Look first in the active list
1276 if( forker->first_child ) {
1277 cur_child = forker->first_child; /* current pointer */
1278 while( cur_child->pid != pid && cur_child->next != forker->first_child )
1279 cur_child = cur_child->next;
1281 if( cur_child->pid == pid ) {
1282 // We found the right node. Remove it from the list.
1283 if( cur_child->next == cur_child )
1284 forker->first_child = NULL; // only child in the list
1286 if( forker->first_child == cur_child )
1287 forker->first_child = cur_child->next; // Reseat forker->first_child
1289 // Stitch the adjacent nodes together
1290 cur_child->prev->next = cur_child->next;
1291 cur_child->next->prev = cur_child->prev;
1294 cur_child = NULL; // Didn't find it in the active list
1298 // Maybe it's in the idle list. This can happen if, for example,
1299 // a child is killed by a signal while it's between requests.
1301 prefork_child* prev = NULL;
1302 cur_child = forker->idle_list;
1303 while( cur_child && cur_child->pid != pid ) {
1305 cur_child = cur_child->next;
1309 // Detach from the list
1311 prev->next = cur_child->next;
1313 forker->idle_list = cur_child->next;
1314 } // else we can't find it
1317 // If we found the node, destroy it.
1319 prefork_child_free( forker, cur_child );
1323 @brief Create and initialize a prefork_child.
1324 @param forker Pointer to the prefork_simple that will own the prefork_child.
1325 @param read_data_fd Used by child to read request from parent.
1326 @param write_data_fd Used by parent to write request to child.
1327 @param read_status_fd Used by parent to read status from child.
1328 @param write_status_fd Used by child to write status to parent.
1329 @return Pointer to the newly created prefork_child.
1331 The calling code is responsible for freeing the prefork_child by calling
1332 prefork_child_free().
1334 static prefork_child* prefork_child_init( prefork_simple* forker,
1335 int read_data_fd, int write_data_fd,
1336 int read_status_fd, int write_status_fd ) {
1338 // Allocate a prefork_child -- from the free list if possible, or from
1339 // the heap if necessary. The free list is a non-circular, singly-linked list.
1340 prefork_child* child;
1341 if( forker->free_list ) {
1342 child = forker->free_list;
1343 forker->free_list = child->next;
1345 child = safe_malloc( sizeof( prefork_child ));
1348 child->read_data_fd = read_data_fd;
1349 child->write_data_fd = write_data_fd;
1350 child->read_status_fd = read_status_fd;
1351 child->write_status_fd = write_status_fd;
1352 child->max_requests = forker->max_requests;
1353 child->appname = forker->appname; // We don't make a separate copy
1354 child->keepalive = forker->keepalive;
1362 @brief Terminate all child processes and clear out a prefork_simple.
1363 @param prefork Pointer to the prefork_simple to be cleared out.
1365 We do not deallocate the prefork_simple itself, just its contents.
1367 static void prefork_clear( prefork_simple* prefork, bool graceful ) {
1369 // always de-register routers before killing child processes (or waiting
1370 // for them to complete) so that new requests are directed elsewhere.
1371 osrf_prefork_register_routers(global_forker->appname, true);
1373 while( prefork->first_child ) {
1376 // wait for at least one active child to become idle, then repeat.
1377 // once complete, all children will be idle and cleaned up below.
1378 osrfLogInfo(OSRF_LOG_MARK, "graceful shutdown waiting...");
1379 check_children(prefork, 1);
1382 // Kill and delete all the active children
1383 kill( prefork->first_child->pid, SIGKILL );
1384 del_prefork_child( prefork, prefork->first_child->pid );
1389 osrfLogInfo(OSRF_LOG_MARK,
1390 "all active children are now idle in graceful shutdown");
1393 // Kill all the idle prefork children, close their file
1394 // descriptors, and move them to the free list.
1395 prefork_child* child = prefork->idle_list;
1396 prefork->idle_list = NULL;
1398 prefork_child* temp = child->next;
1399 kill( child->pid, SIGKILL );
1400 prefork_child_free( prefork, child );
1403 //prefork->current_num_children = 0;
1405 // Physically free the free list of prefork_children.
1406 child = prefork->free_list;
1407 prefork->free_list = NULL;
1409 prefork_child* temp = child->next;
1414 // Close the Jabber connection
1415 client_free( prefork->connection );
1416 prefork->connection = NULL;
1418 // After giving the child processes a second to terminate, wait on them so that they
1419 // don't become zombies. We don't wait indefinitely, so it's possible that some
1420 // children will survive a bit longer.
1422 while( (waitpid( -1, NULL, WNOHANG )) > 0 ) {
1423 --prefork->current_num_children;
1426 free( prefork->appname );
1427 prefork->appname = NULL;
1431 @brief Destroy and deallocate a prefork_child.
1432 @param forker Pointer to the prefork_simple that owns the prefork_child.
1433 @param child Pointer to the prefork_child to be destroyed.
1435 static void prefork_child_free( prefork_simple* forker, prefork_child* child ) {
1436 close( child->read_data_fd );
1437 close( child->write_data_fd );
1438 close( child->read_status_fd );
1439 close( child->write_status_fd );
1441 // Stick the prefork_child in a free list for potential reuse. This is a
1442 // non-circular, singly linked list.
1444 child->next = forker->free_list;
1445 forker->free_list = child;