]> git.evergreen-ils.org Git - OpenSRF.git/blob - src/libopensrf/osrf_prefork.c
Reverting to a previous version believed to be good.
[OpenSRF.git] / src / libopensrf / osrf_prefork.c
1 /**
2         @file osrf_prefork.c
3         @brief Implementation of 
4
5         Spawn a collection of child processes, replacing them as needed.  Forward requests to them
6         and let the children do the work.
7
8         Each child processes some maximum number of requests before it terminates itself.  When a
9         child dies, either deliberately or otherwise, we can spawn another one to replace it,
10         keeping the number of children within a predefined range.
11
12         Use a doubly-linked circular list to keep track of the children, forwarding requests to them
13         in an approximately round-robin fashion.
14
15         For each child, set up two pipes:
16         - One for the parent to send requests to the child.
17         - One for the child to notify the parent that it is available for another request.
18
19         The message sent to the child is an XML stanza as received from Jabber.
20
21         When the child finishes processing the request, it writes the string "available" back
22         to the parent.  Then the parent knows that it can send that child another request.
23 */
24
25 #include <signal.h>
26 #include <sys/types.h>
27 #include <sys/time.h>
28 #include <unistd.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <sys/select.h>
33 #include <sys/wait.h>
34
35 #include "opensrf/utils.h"
36 #include "opensrf/log.h"
37 #include "opensrf/transport_client.h"
38 #include "opensrf/osrf_stack.h"
39 #include "opensrf/osrf_settings.h"
40 #include "opensrf/osrf_application.h"
41
42 #define READ_BUFSIZE 1024
43 #define ABS_MAX_CHILDREN 256
44
45 typedef struct {
46         int max_requests;     /**< How many requests a child processes before terminating. */
47         int min_children;     /**< Minimum number of children to maintain. */
48         int max_children;     /**< Maximum number of children to maintain. */
49         int fd;               /**< Unused. */
50         int data_to_child;    /**< Unused. */
51         int data_to_parent;   /**< Unused. */
52         int current_num_children;   /**< How many children are currently on the list. */
53         int keepalive;        /**< Keepalive time for stateful sessions. */
54         char* appname;        /**< Name of the application. */
55         /** Points to a circular linked list of children */
56         struct prefork_child_struct* first_child;
57         /** List of allocated but unused prefork_children, available for reuse.  Each one is just
58                 raw memory, apart from the "next" pointer used to stitch them together.  In particular,
59                 there is no child process for them, and the file descriptors are not open. */
60         struct prefork_child_struct* free_list;
61         transport_client* connection;  /**< Connection to Jabber */
62 } prefork_simple;
63
64 struct prefork_child_struct {
65         pid_t pid;            /**< Process ID of the child */
66         int read_data_fd;     /**< Child uses to read request */
67         int write_data_fd;    /**< Parent uses to write request */
68         int read_status_fd;   /**< Parent reads to see if child is available */
69         int write_status_fd;  /**< Child uses to notify parent when it's available again */
70         int available;        /**< Boolean; true when the child is between requests */
71         int max_requests;     /**< How many requests a child can process before terminating */
72         const char* appname;  /**< Name of the application */
73         int keepalive;        /**< Keepalive time for stateful sessions. */
74         struct prefork_child_struct* next;  /**< Linkage pointer for linked list */
75         struct prefork_child_struct* prev;  /**< Linkage pointer for linked list */
76 };
77
78 typedef struct prefork_child_struct prefork_child;
79
80 static int prefork_simple_init( prefork_simple* prefork, transport_client* client,
81         int max_requests, int min_children, int max_children );
82 static prefork_child* launch_child( prefork_simple* forker );
83 static void prefork_launch_children( prefork_simple* forker );
84 static void prefork_run(prefork_simple* forker);
85 static void add_prefork_child( prefork_simple* forker, prefork_child* child );
86
87 static void del_prefork_child( prefork_simple* forker, pid_t pid );
88 static void check_children( prefork_simple* forker, int forever );
89 static void prefork_child_process_request(prefork_child*, char* data);
90 static int prefork_child_init_hook(prefork_child*);
91 static prefork_child* prefork_child_init( prefork_simple* forker,
92                 int read_data_fd, int write_data_fd,
93                 int read_status_fd, int write_status_fd );
94
95 /* listens on the 'data_to_child' fd and wait for incoming data */
96 static void prefork_child_wait( prefork_child* child );
97 static void prefork_clear( prefork_simple* );
98 static void prefork_child_free( prefork_simple* forker, prefork_child* );
99 static void osrf_prefork_register_routers( const char* appname );
100 static void osrf_prefork_child_exit( prefork_child* );
101
102 /** Boolean.  Set to true by a signal handler when it traps SIGCHLD. */
103 static volatile sig_atomic_t child_dead;
104
105 static void sigchld_handler( int sig );
106
107 int osrf_prefork_run(const char* appname) {
108
109         if(!appname) {
110                 osrfLogError( OSRF_LOG_MARK, "osrf_prefork_run requires an appname to run!");
111                 return -1;
112         }
113
114         set_proc_title( "OpenSRF Listener [%s]", appname );
115
116         int maxr = 1000;
117         int maxc = 10;
118         int minc = 3;
119         int kalive = 5;
120
121         osrfLogInfo( OSRF_LOG_MARK, "Loading config in osrf_forker for app %s", appname);
122
123         char* max_req      = osrf_settings_host_value("/apps/%s/unix_config/max_requests", appname);
124         char* min_children = osrf_settings_host_value("/apps/%s/unix_config/min_children", appname);
125         char* max_children = osrf_settings_host_value("/apps/%s/unix_config/max_children", appname);
126         char* keepalive    = osrf_settings_host_value("/apps/%s/keepalive", appname);
127
128         if(!keepalive) osrfLogWarning( OSRF_LOG_MARK, "Keepalive is not defined, assuming %d", kalive);
129         else kalive = atoi(keepalive);
130
131         if(!max_req) osrfLogWarning( OSRF_LOG_MARK, "Max requests not defined, assuming %d", maxr);
132         else maxr = atoi(max_req);
133
134         if(!min_children) osrfLogWarning( OSRF_LOG_MARK,
135                         "Min children not defined, assuming %d", minc);
136         else minc = atoi(min_children);
137
138         if(!max_children) osrfLogWarning( OSRF_LOG_MARK,
139                         "Max children not defined, assuming %d", maxc);
140         else maxc = atoi(max_children);
141
142         free(keepalive);
143         free(max_req);
144         free(min_children);
145         free(max_children);
146         /* --------------------------------------------------- */
147
148         char* resc = va_list_to_string("%s_listener", appname);
149
150         if(!osrfSystemBootstrapClientResc( NULL, NULL, resc )) {
151                 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()");
152                 free(resc);
153                 return -1;
154         }
155
156         free(resc);
157
158         prefork_simple forker;
159
160         if( prefork_simple_init( &forker, osrfSystemGetTransportClient(), maxr, minc, maxc ) ) {
161                 osrfLogError( OSRF_LOG_MARK,
162                                 "osrf_prefork_run() failed to create prefork_simple object" );
163                 return -1;
164         }
165
166         // Finish initializing the prefork_simple
167         forker.appname   = strdup(appname);
168         forker.keepalive = kalive;
169
170         // Spawn the children
171         prefork_launch_children( &forker );
172
173         // Tell the router that you're open for business
174         osrf_prefork_register_routers(appname);
175
176         // Sit back and let the requests roll in
177         osrfLogInfo( OSRF_LOG_MARK, "Launching osrf_forker for app %s", appname);
178         prefork_run( &forker );
179
180         osrfLogWarning( OSRF_LOG_MARK, "prefork_run() returned - how??");
181         prefork_clear( &forker );
182         return 0;
183 }
184
185 /**
186         @brief Register the application with a specified router.
187         @param appname Name of the application.
188         @param routerName Name of the router.
189         @param routerDomain Domain of the router.
190
191         Tell the router that you're open for business so that it can route requests to you.
192 */
193 static void osrf_prefork_send_router_registration(
194                 const char* appname, const char* routerName, const char* routerDomain ) {
195         // Get a pointer to the global transport_client
196         transport_client* client = osrfSystemGetTransportClient();
197
198         // Construct the Jabber address of the router
199         char* jid = va_list_to_string( "%s@%s/router", routerName, routerDomain );
200         osrfLogInfo( OSRF_LOG_MARK, "%s registering with router %s", appname, jid );
201
202         // Create the registration message, and send it
203         transport_message* msg = message_init( "registering", NULL, NULL, jid, NULL );
204         message_set_router_info( msg, NULL, NULL, appname, "register", 0 );
205         client_send_message( client, msg );
206
207         // Clean up
208         message_free( msg );
209         free(jid);
210 }
211
212 /** parses a single "complex" router configuration chunk */
213 static void osrf_prefork_parse_router_chunk(const char* appname, jsonObject* routerChunk) {
214
215         const char* routerName = jsonObjectGetString(jsonObjectGetKeyConst(routerChunk, "name"));
216         const char* domain = jsonObjectGetString(jsonObjectGetKeyConst(routerChunk, "domain"));
217         const jsonObject* services = jsonObjectGetKeyConst(routerChunk, "services");
218         osrfLogDebug(OSRF_LOG_MARK, "found router config with domain %s and name %s",
219                         routerName, domain);
220
221         if( services && services->type == JSON_HASH ) {
222                 osrfLogDebug(OSRF_LOG_MARK, "investigating router information...");
223                 const jsonObject* service_obj = jsonObjectGetKeyConst(services, "service");
224                 if( !service_obj )
225                         ;    // do nothing (shouldn't happen)
226                 else if( JSON_ARRAY == service_obj->type ) {
227                         int j;
228                         for(j = 0; j < service_obj->size; j++ ) {
229                                 const char* service = jsonObjectGetString(jsonObjectGetIndex(service_obj, j));
230                                 if( service && !strcmp( appname, service ))
231                                         osrf_prefork_send_router_registration(appname, routerName, domain);
232                         }
233                 }
234                 else if( JSON_STRING == service_obj->type ) {
235                         if( !strcmp(appname, jsonObjectGetString( service_obj )) )
236                                 osrf_prefork_send_router_registration(appname, routerName, domain);
237                 }
238         } else {
239                 osrf_prefork_send_router_registration(appname, routerName, domain);
240         }
241 }
242
243 static void osrf_prefork_register_routers( const char* appname ) {
244
245         jsonObject* routerInfo = osrfConfigGetValueObject(NULL, "/routers/router");
246
247         int i;
248         for(i = 0; i < routerInfo->size; i++) {
249                 jsonObject* routerChunk = jsonObjectGetIndex(routerInfo, i);
250
251                 if(routerChunk->type == JSON_STRING) {
252                         /* this accomodates simple router configs */
253                         char* routerName = osrfConfigGetValue( NULL, "/router_name" );
254                         char* domain = osrfConfigGetValue(NULL, "/routers/router");
255                         osrfLogDebug(OSRF_LOG_MARK, "found simple router settings with router name %s",
256                                         routerName);
257                         osrf_prefork_send_router_registration(appname, routerName, domain);
258
259                 } else {
260                         osrf_prefork_parse_router_chunk(appname, routerChunk);
261                 }
262         }
263 }
264
265 static int prefork_child_init_hook(prefork_child* child) {
266
267         if(!child) return -1;
268         osrfLogDebug( OSRF_LOG_MARK, "Child init hook for child %d", child->pid);
269
270         osrfSystemInitCache();
271         char* resc = va_list_to_string("%s_drone", child->appname);
272
273         /* if we're a source-client, tell the logger now that we're a new process*/
274         char* isclient = osrfConfigGetValue(NULL, "/client");
275         if( isclient && !strcasecmp(isclient,"true") )
276                 osrfLogSetIsClient(1);
277         free(isclient);
278
279         /* we want to remove traces of our parents socket connection
280         * so we can have our own */
281         osrfSystemIgnoreTransportClient();
282
283         if(!osrfSystemBootstrapClientResc( NULL, NULL, resc)) {
284                 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()");
285                 free(resc);
286                 return -1;
287         }
288
289         free(resc);
290
291         if( ! osrfAppRunChildInit(child->appname) ) {
292                 osrfLogDebug(OSRF_LOG_MARK, "Prefork child_init succeeded\n");
293         } else {
294                 osrfLogError(OSRF_LOG_MARK, "Prefork child_init failed\n");
295                 return -1;
296         }
297
298         set_proc_title( "OpenSRF Drone [%s]", child->appname );
299         return 0;
300 }
301
302 static void prefork_child_process_request(prefork_child* child, char* data) {
303         if( !child ) return;
304
305         transport_client* client = osrfSystemGetTransportClient();
306
307         if(!client_connected(client)) {
308                 osrfSystemIgnoreTransportClient();
309                 osrfLogWarning(OSRF_LOG_MARK, "Reconnecting child to opensrf after disconnect...");
310                 if(!osrf_system_bootstrap_client(NULL, NULL)) {
311                         osrfLogError( OSRF_LOG_MARK,
312                                 "Unable to bootstrap client in prefork_child_process_request()");
313                         sleep(1);
314                         osrf_prefork_child_exit(child);
315                 }
316         }
317
318         /* construct the message from the xml */
319         transport_message* msg = new_message_from_xml( data );
320
321         osrfAppSession* session = osrf_stack_transport_handler(msg, child->appname);
322         if(!session) return;
323
324         if( session->stateless && session->state != OSRF_SESSION_CONNECTED ) {
325                 osrfAppSessionFree( session );
326                 return;
327         }
328
329         osrfLogDebug( OSRF_LOG_MARK, "Entering keepalive loop for session %s", session->session_id );
330         int keepalive = child->keepalive;
331         int retval;
332         int recvd;
333         time_t start;
334         time_t end;
335
336         while(1) {
337
338                 osrfLogDebug(OSRF_LOG_MARK,
339                                 "osrf_prefork calling queue_wait [%d] in keepalive loop", keepalive);
340                 start   = time(NULL);
341                 retval  = osrf_app_session_queue_wait(session, keepalive, &recvd);
342                 end     = time(NULL);
343
344                 osrfLogDebug(OSRF_LOG_MARK, "Data received == %d", recvd);
345
346                 if(retval) {
347                         osrfLogError(OSRF_LOG_MARK, "queue-wait returned non-success %d", retval);
348                         break;
349                 }
350
351                 /* see if the client disconnected from us */
352                 if(session->state != OSRF_SESSION_CONNECTED)
353                         break;
354
355                 /* if no data was reveived within the timeout interval */
356                 if( !recvd && (end - start) >= keepalive ) {
357                         osrfLogInfo(OSRF_LOG_MARK,
358                                         "No request was received in %d seconds, exiting stateful session", keepalive);
359                         osrfAppSessionStatus(
360                                         session,
361                                         OSRF_STATUS_TIMEOUT,
362                                         "osrfConnectStatus",
363                                         0, "Disconnected on timeout" );
364
365                         break;
366                 }
367         }
368
369         osrfLogDebug( OSRF_LOG_MARK, "Exiting keepalive loop for session %s", session->session_id );
370         osrfAppSessionFree( session );
371         return;
372 }
373
374
375 /**
376         @brief Partially initialize a prefork_simple provided by the caller.
377         @param prefork Pointer to a a raw prefork_simple to be initialized.
378         @param client Pointer to a transport_client (connection to Jabber).
379         @param max_requests
380         @param min_children Minimum number of child processes to maintain.
381         @param max_children Maximum number of child processes to maintain.
382         @return 0 if successful, or 1 if not (due to invalid parameters).
383 */
384 static int prefork_simple_init( prefork_simple* prefork, transport_client* client,
385                  int max_requests, int min_children, int max_children ) {
386
387         if( min_children > max_children ) {
388                 osrfLogError( OSRF_LOG_MARK,  "min_children (%d) is greater "
389                                 "than max_children (%d)", min_children, max_children );
390                 return 1;
391         }
392
393         if( max_children > ABS_MAX_CHILDREN ) {
394                 osrfLogError( OSRF_LOG_MARK,  "max_children (%d) is greater than ABS_MAX_CHILDREN (%d)",
395                                 max_children, ABS_MAX_CHILDREN );
396                 return 1;
397         }
398
399         osrfLogInfo(OSRF_LOG_MARK, "Prefork launching child with max_request=%d,"
400                 "min_children=%d, max_children=%d", max_requests, min_children, max_children );
401
402         /* flesh out the struct */
403         //prefork_simple* prefork = safe_malloc(sizeof(prefork_simple));
404         prefork->max_requests = max_requests;
405         prefork->min_children = min_children;
406         prefork->max_children = max_children;
407         prefork->fd           = 0;
408         prefork->data_to_child = 0;
409         prefork->data_to_parent = 0;
410         prefork->current_num_children = 0;
411         prefork->keepalive    = 0;
412         prefork->appname      = NULL;
413         prefork->first_child  = NULL;
414         prefork->free_list    = NULL;
415         prefork->connection   = client;
416
417         return 0;
418 }
419
420 static prefork_child* launch_child( prefork_simple* forker ) {
421
422         pid_t pid;
423         int data_fd[2];
424         int status_fd[2];
425
426         /* Set up the data pipes and add the child struct to the parent */
427         if( pipe(data_fd) < 0 ) { /* build the data pipe*/
428                 osrfLogError( OSRF_LOG_MARK,  "Pipe making error" );
429                 return NULL;
430         }
431
432         if( pipe(status_fd) < 0 ) {/* build the status pipe */
433                 osrfLogError( OSRF_LOG_MARK,  "Pipe making error" );
434                 return NULL;
435         }
436
437         osrfLogInternal( OSRF_LOG_MARK,  "Pipes: %d %d %d %d",
438                         data_fd[0], data_fd[1], status_fd[0], status_fd[1] );
439         prefork_child* child = prefork_child_init( forker, data_fd[0],
440                         data_fd[1], status_fd[0], status_fd[1] );
441
442         add_prefork_child( forker, child );
443
444         if( (pid=fork()) < 0 ) {
445                 osrfLogError( OSRF_LOG_MARK,  "Forking Error" );
446                 return NULL;
447         }
448
449         if( pid > 0 ) {  /* parent */
450
451                 signal(SIGCHLD, sigchld_handler);
452                 (forker->current_num_children)++;
453                 child->pid = pid;
454
455                 osrfLogDebug( OSRF_LOG_MARK,  "Parent launched %d", pid );
456                 /* *no* child pipe FD's can be closed or the parent will re-use fd's that
457                         the children are currently using */
458                 return child;
459         }
460
461         else { /* child */
462
463                 osrfLogInternal( OSRF_LOG_MARK,
464                                 "I am  new child with read_data_fd = %d and write_status_fd = %d",
465                                 child->read_data_fd, child->write_status_fd );
466
467                 child->pid = getpid();
468                 close( child->write_data_fd );
469                 close( child->read_status_fd );
470
471                 /* do the initing */
472                 if( prefork_child_init_hook(child) == -1 ) {
473                         osrfLogError(OSRF_LOG_MARK,
474                                 "Forker child going away because we could not connect to OpenSRF...");
475                         osrf_prefork_child_exit(child);
476                 }
477
478                 prefork_child_wait( child );
479                 osrf_prefork_child_exit(child); /* just to be sure */
480         }
481         return NULL;
482 }
483
484 static void osrf_prefork_child_exit(prefork_child* child) {
485         osrfAppRunExitCode();
486         exit(0);
487 }
488
489 static void prefork_launch_children( prefork_simple* forker ) {
490         if(!forker) return;
491         int c = 0;
492         while( c++ < forker->min_children )
493                 launch_child( forker );
494 }
495
496
497 /**
498         @brief Signal handler for SIGCHLD: note that a child process has terminated.
499         @param sig The value of the trapped signal; always SIGCHLD.
500
501         Set a boolean to be checked later.
502 */
503 static void sigchld_handler( int sig ) {
504         signal(SIGCHLD, sigchld_handler);
505         child_dead = 1;
506 }
507
508
509 /**
510         @brief Replenish the collection of child processes, after one has terminated.
511         @param forker Pointer to the prefork_simple that manages the child processes.
512
513         This function is called when we notice (via a signal handler) that a child
514         process has died.
515
516         Spawn a new child process to replace each of the terminated ones.
517 */
518 void reap_children( prefork_simple* forker ) {
519
520         pid_t child_pid;
521
522         // Reset our boolean so that we can detect any further terminations.
523         child_dead = 0;
524
525         // Bury the children so that they won't be zombies.  WNOHANG means that waitpid() returns
526         // immediately if there are no waitable children, instead of waiting for more to die.
527         // Ignore the return code of the child.  We don't do an autopsy.
528         while( (child_pid = waitpid(-1, NULL, WNOHANG)) > 0)
529                 del_prefork_child( forker, child_pid );
530
531         /* Spawn more children as needed to maintain a minimum brood. */
532         while( forker->current_num_children < forker->min_children )
533                 launch_child( forker );
534 }
535
536 static void prefork_run(prefork_simple* forker) {
537
538         if( forker->first_child == NULL )
539                 return;
540
541         transport_message* cur_msg = NULL;
542
543
544         while(1) {
545
546                 if( forker->first_child == NULL ) {/* no more children */
547                         osrfLogWarning( OSRF_LOG_MARK, "No more children..." );
548                         return;
549                 }
550
551                 // Wait indefinitely for an input message
552                 osrfLogDebug( OSRF_LOG_MARK, "Forker going into wait for data...");
553                 cur_msg = client_recv( forker->connection, -1 );
554
555                 if( cur_msg == NULL )
556                         continue;           // Error?  Interrupted by a signal?
557
558                 int honored = 0;     /* will be set to true when we service the request */
559                 int no_recheck = 0;
560
561                 while( ! honored ) {
562
563                         if(!no_recheck)
564                                 check_children( forker, 0 );
565                         no_recheck = 0;
566
567                         osrfLogDebug( OSRF_LOG_MARK, "Server received inbound data" );
568                         int k;
569                         prefork_child* cur_child = forker->first_child;
570
571                         /* Look for an available child */
572                         for( k = 0; k < forker->current_num_children; k++ ) {
573
574                                 osrfLogInternal( OSRF_LOG_MARK,
575                                                 "Searching for available child. cur_child->pid = %d", cur_child->pid );
576                                 osrfLogInternal( OSRF_LOG_MARK, "Current num children %d and loop %d",
577                                                 forker->current_num_children, k);
578
579                                 if( cur_child->available ) {
580                                         osrfLogDebug( OSRF_LOG_MARK, "forker sending data to %d", cur_child->pid );
581
582                                         message_prepare_xml( cur_msg );
583                                         char* data = cur_msg->msg_xml;
584                                         if( ! data || strlen(data) < 1 )
585                                                 break;
586
587                                         cur_child->available = 0;
588                                         osrfLogInternal( OSRF_LOG_MARK, "Writing to child fd %d",
589                                                         cur_child->write_data_fd );
590
591                                         int written = 0;
592                                         if( (written = write( cur_child->write_data_fd, data, strlen(data) + 1 )) < 0 ) {
593                                                 osrfLogWarning( OSRF_LOG_MARK, "Write returned error %d", errno);
594                                                 cur_child = cur_child->next;
595                                                 continue;
596                                         }
597
598                                         forker->first_child = cur_child->next;
599                                         honored = 1;
600                                         break;
601                                 } else
602                                         cur_child = cur_child->next;
603                         }
604
605                         /* if none available, add a new child if we can */
606                         if( ! honored ) {
607                                 osrfLogDebug( OSRF_LOG_MARK, "Not enough children, attempting to add...");
608
609                                 if( forker->current_num_children < forker->max_children ) {
610                                         osrfLogDebug( OSRF_LOG_MARK,  "Launching new child with current_num = %d",
611                                                         forker->current_num_children );
612
613                                         prefork_child* new_child = launch_child( forker );
614                                         if( new_child ) {
615
616                                                 message_prepare_xml( cur_msg );
617                                                 char* data = cur_msg->msg_xml;
618
619                                                 if( data ) {
620                                                         int len = strlen(data);
621
622                                                         if( len > 0 ) {
623                                                                 new_child->available = 0;
624                                                                 osrfLogDebug( OSRF_LOG_MARK, "Writing to new child fd %d : pid %d",
625                                                                                 new_child->write_data_fd, new_child->pid );
626
627                                                                 if( write( new_child->write_data_fd, data, len + 1 ) >= 0 ) {
628                                                                         forker->first_child = new_child->next;
629                                                                         honored = 1;
630                                                                 }
631                                                         }
632                                                 }
633                                         }
634
635                                 }
636                         }
637
638                         if( !honored ) {
639                                 osrfLogWarning( OSRF_LOG_MARK,  "No children available, waiting...");
640
641                                 check_children( forker, 1 );  /* non-poll version */
642                                 /* tell the loop not to call check_children again, since we're calling it now */
643                                 no_recheck = 1;
644                         }
645
646                         if( child_dead )
647                                 reap_children(forker);
648
649                 } // honored?
650
651                 message_free( cur_msg );
652
653         } /* end top level listen loop */
654
655 }
656
657
658 /** XXX Add a flag which tells select() to wait forever on children
659         in the best case, this will be faster than calling usleep(x), and
660         in the worst case it won't be slower and will do less logging...
661 */
662 static void check_children( prefork_simple* forker, int forever ) {
663
664         //check_begin:
665
666         int select_ret;
667         fd_set read_set;
668         FD_ZERO(&read_set);
669         int max_fd = 0;
670         int n;
671
672         if( child_dead )
673                 reap_children(forker);
674
675         prefork_child* cur_child = forker->first_child;
676
677         int i;
678         for( i = 0; i!= forker->current_num_children; i++ ) {
679
680                 if( cur_child->read_status_fd > max_fd )
681                         max_fd = cur_child->read_status_fd;
682                 FD_SET( cur_child->read_status_fd, &read_set );
683                 cur_child = cur_child->next;
684         }
685
686         FD_CLR(0,&read_set); /* just to be sure */
687
688         if( forever ) {
689                 osrfLogWarning(OSRF_LOG_MARK,
690                                 "We have no children available - waiting for one to show up...");
691
692                 if( (select_ret=select( max_fd + 1 , &read_set, NULL, NULL, NULL)) == -1 ) {
693                         osrfLogWarning( OSRF_LOG_MARK,  "Select returned error %d on check_children", errno );
694                 }
695                 osrfLogInfo(OSRF_LOG_MARK,
696                                 "select() completed after waiting on children to become available");
697
698         } else {
699
700                 struct timeval tv;
701                 tv.tv_sec   = 0;
702                 tv.tv_usec  = 0;
703
704                 if( (select_ret=select( max_fd + 1 , &read_set, NULL, NULL, &tv)) == -1 ) {
705                         osrfLogWarning( OSRF_LOG_MARK,  "Select returned error %d on check_children", errno );
706                 }
707         }
708
709         if( select_ret == 0 )
710                 return;
711
712         /* see if one of a child has told us it's done */
713         cur_child = forker->first_child;
714         int j;
715         int num_handled = 0;
716         for( j = 0; j!= forker->current_num_children && num_handled < select_ret ; j++ ) {
717
718                 if( FD_ISSET( cur_child->read_status_fd, &read_set ) ) {
719                         //printf( "Server received status from a child %d\n", cur_child->pid );
720                         osrfLogDebug( OSRF_LOG_MARK,
721                                         "Server received status from a child %d", cur_child->pid );
722
723                         num_handled++;
724
725                         /* now suck off the data */
726                         char buf[64];
727                         if( (n=read(cur_child->read_status_fd, buf, sizeof(buf) - 1)) < 0 ) {
728                                 osrfLogWarning( OSRF_LOG_MARK,
729                                                 "Read error after select in child status read with errno %d", errno);
730                         }
731                         else {
732                                 buf[n] = '\0';
733                                 osrfLogDebug( OSRF_LOG_MARK,  "Read %d bytes from status buffer: %s", n, buf );
734                         }
735                         cur_child->available = 1;
736                 }
737                 cur_child = cur_child->next;
738         }
739
740 }
741
742
743 static void prefork_child_wait( prefork_child* child ) {
744
745         int i,n;
746         growing_buffer* gbuf = buffer_init( READ_BUFSIZE );
747         char buf[READ_BUFSIZE];
748
749         for( i = 0; i < child->max_requests; i++ ) {
750
751                 n = -1;
752                 int gotdata = 0;    // boolean; set to true if we get data
753                 clr_fl(child->read_data_fd, O_NONBLOCK );
754
755                 while( (n=read(child->read_data_fd, buf, READ_BUFSIZE-1)) > 0 ) {
756                         buf[n] = '\0';
757                         osrfLogDebug(OSRF_LOG_MARK, "Prefork child read %d bytes of data", n);
758                         if(!gotdata) {
759                                 set_fl(child->read_data_fd, O_NONBLOCK );
760                                 gotdata = 1;
761                         }
762                         buffer_add( gbuf, buf );
763                 }
764
765                 if( errno == EAGAIN ) n = 0;
766
767                 if( errno == EPIPE ) {
768                         osrfLogDebug(OSRF_LOG_MARK, "C child attempted read on broken pipe, exiting...");
769                         break;
770                 }
771
772                 if( n < 0 ) {
773                         osrfLogWarning( OSRF_LOG_MARK,
774                                         "Prefork child read returned error with errno %d", errno );
775                         break;
776
777                 } else if( gotdata ) {
778                         osrfLogDebug(OSRF_LOG_MARK, "Prefork child got a request.. processing..");
779                         prefork_child_process_request(child, gbuf->buf);
780                         buffer_reset( gbuf );
781                 }
782
783                 if( i < child->max_requests - 1 )
784                         write( child->write_status_fd, "available" /*less than 64 bytes*/, 9 );
785         }
786
787         buffer_free(gbuf);
788
789         osrfLogDebug( OSRF_LOG_MARK, "Child with max-requests=%d, num-served=%d exiting...[%ld]",
790                         child->max_requests, i, (long) getpid() );
791
792         osrf_prefork_child_exit(child); /* just to be sure */
793 }
794
795 /**
796         @brief Add a prefork_child to the end of the list.
797         @param forker Pointer to the prefork_simple that owns the list.
798         @param child Pointer to the prefork_child to be added.
799 */
800 static void add_prefork_child( prefork_simple* forker, prefork_child* child ) {
801
802         if( forker->first_child == NULL ) {
803                 // Simplest case: list is initially empty.
804                 forker->first_child = child;
805                 child->next = child;
806                 child->prev = child;
807         } else {
808                 // Find the last node in the circular list.
809                 prefork_child* last_child = forker->first_child->prev;
810
811                 // Insert the new child between the last and first children.
812                 last_child->next = child;
813                 child->prev      = last_child;
814                 child->next      = forker->first_child;
815                 forker->first_child->prev = child;
816         }
817 }
818
819 /**
820         @brief Remove a prefork_child from the child list.
821         @param forker Pointer to the prefork_simple that owns the child.
822         @param pid Process ID of the child to be removed.
823
824         Besides removing the node from the list, we also close its file descriptors.
825 */
826 static void del_prefork_child( prefork_simple* forker, pid_t pid ) {
827
828         if( forker->first_child == NULL )
829                 return;  // Empty list; bail out.
830
831         osrfLogDebug( OSRF_LOG_MARK, "Deleting Child: %d", pid );
832
833         // Find the node in question
834         prefork_child* cur_child = forker->first_child; /* current pointer */
835         while( cur_child->pid != pid && cur_child->next != forker->first_child )
836                 cur_child = cur_child->next;
837
838         if( cur_child->pid == pid ) {
839                 // We found the right node.  Remove it from the list.
840                 if( cur_child->next == cur_child )
841                         forker->first_child = NULL;    // only child in the list
842                 else {
843                         if( forker->first_child == cur_child )
844                                 forker->first_child = cur_child->next;  // Reseat forker->first_child
845                         
846                         // Stitch the nodes on either side together
847                         cur_child->prev->next = cur_child->next;
848                         cur_child->next->prev = cur_child->prev;
849                 }
850                 --forker->current_num_children;
851                 
852                 //Destroy the node
853                 prefork_child_free( forker, cur_child );
854                 
855         } // else we didn't find a matching node; bail out
856 }
857
858 /**
859         @brief Create and initialize a prefork_child.
860         @param forker Pointer to the prefork_simple that will own the prefork_child.
861         @param read_data_fd Used by child to read request from parent.
862         @param write_data_fd Used by parent to write request to child.
863         @param read_status_fd Used by parent to read status from child.
864         @param write_status_fd Used by child to write status to parent.
865         @return Pointer to the newly created prefork_child.
866
867         The calling code is responsible for freeing the prefork_child by calling
868         prefork_child_free().
869 */
870 static prefork_child* prefork_child_init( prefork_simple* forker,
871         int read_data_fd, int write_data_fd,
872         int read_status_fd, int write_status_fd ) {
873
874         // Allocate a prefork_child -- from the free list if possible, or from
875         // the heap if necessary.  The free list is a non-circular, singly-linked list.
876         prefork_child* child;
877         if( forker->free_list ) {
878                 child = forker->free_list;
879                 forker->free_list = child->next;
880         } else
881                 child = (prefork_child*) safe_malloc(sizeof(prefork_child));
882
883         child->pid              = 0;
884         child->read_data_fd     = read_data_fd;
885         child->write_data_fd    = write_data_fd;
886         child->read_status_fd   = read_status_fd;
887         child->write_status_fd  = write_status_fd;
888         child->available        = 1;
889         child->max_requests     = forker->max_requests;
890         child->appname          = forker->appname;  // We don't make a separate copy
891         child->keepalive        = forker->keepalive;
892         child->next             = NULL;
893         child->prev             = NULL;
894
895         return child;
896 }
897
898
899 /**
900         @brief Terminate all child processes and clear out a prefork_simple.
901         @param prefork Pointer to the prefork_simple to be cleared out.
902
903         We do not deallocate the prefork_simple itself, just its contents.
904 */
905 static void prefork_clear( prefork_simple* prefork ) {
906
907         // Kill all the child processes with a single call, not by killing each one separately.
908         // Implication: we can't have more than one prefork_simple outstanding, because
909         // destroying one would kill the children of all.
910         while( prefork->first_child != NULL ) {
911                 osrfLogInfo( OSRF_LOG_MARK, "Killing child processes ..." );
912                 kill( 0, SIGKILL );
913         }
914
915         // Bury the children so that they won't be zombies.  WNOHANG means that waitpid() returns
916         // immediately if there are no waitable children, instead of waiting for more to die.
917         // Ignore the return code of the child.  We don't do an autopsy.
918         pid_t child_pid;
919         while( (child_pid = waitpid(-1, NULL, WNOHANG)) > 0)
920                 del_prefork_child( prefork, child_pid );
921
922     // Close the Jabber connection
923         client_free(prefork->connection);
924         
925         // Physically free the free list of prefork_children.
926         prefork_child* child = prefork->first_child;
927         while( child ) {
928                 prefork_child* temp = child->next;
929                 free( child );
930                 child = temp;
931         }
932
933         free(prefork->appname);
934 }
935
936 /**
937         @brief Destroy and deallocate a prefork_child.
938         @param forker Pointer to the prefork_simple that owns the prefork_child.
939         @param child Pointer to the prefork_child to be destroyed.
940 */
941 static void prefork_child_free( prefork_simple* forker, prefork_child* child ) {
942         close( child->read_data_fd );
943         close( child->write_data_fd );
944         close( child->read_status_fd );
945         close( child->write_status_fd );
946
947         // Stick the prefork_child in a free list for potential reuse.  This is a
948         // non-circular, singly linked list.
949         child->prev = NULL;
950         child->next = forker->free_list;
951         forker->free_list = child;
952 }