]> git.evergreen-ils.org Git - Evergreen.git/blob - OpenSRF/src/libstack/osrf_prefork.c
when a backend child is not able to connect to jabber, the child now goes away
[Evergreen.git] / OpenSRF / src / libstack / osrf_prefork.c
1 #include "osrf_prefork.h"
2 #include <signal.h>
3 #include "osrf_app_session.h"
4 #include "osrf_application.h"
5
6 /* true if we just deleted a child.  This will allow us to make sure we're
7         not trying to use freed memory */
8 int child_dead;
9
10 int main();
11 void sigchld_handler( int sig );
12
13 int osrf_prefork_run(char* appname) {
14
15         if(!appname) {
16                 osrfLogError( OSRF_LOG_MARK, "osrf_prefork_run requires an appname to run!");
17                 return -1;
18         }
19
20         set_proc_title( "OpenSRF Listener [%s]", appname );
21
22         int maxr = 1000; 
23         int maxc = 10;
24         int minc = 3;
25
26         osrfLogInfo( OSRF_LOG_MARK, "Loading config in osrf_forker for app %s", appname);
27
28         jsonObject* max_req = osrf_settings_host_value_object("/apps/%s/unix_config/max_requests", appname);
29         jsonObject* min_children = osrf_settings_host_value_object("/apps/%s/unix_config/min_children", appname);
30         jsonObject* max_children = osrf_settings_host_value_object("/apps/%s/unix_config/max_children", appname);
31
32         char* keepalive = osrf_settings_host_value("/apps/%s/keepalive", appname);
33         time_t kalive;
34         if( keepalive ) {
35                 kalive = atoi(keepalive);
36                 free(keepalive);
37         } else {
38                 kalive = 5; /* give it a default */
39         }
40
41         osrfLogInfo(OSRF_LOG_MARK, "keepalive setting = %d seconds", kalive);
42
43
44         
45         if(!max_req) osrfLogWarning( OSRF_LOG_MARK, "Max requests not defined, assuming 1000");
46         else maxr = (int) jsonObjectGetNumber(max_req);
47
48         if(!min_children) osrfLogWarning( OSRF_LOG_MARK, "Min children not defined, assuming 3");
49         else minc = (int) jsonObjectGetNumber(min_children);
50
51         if(!max_children) osrfLogWarning( OSRF_LOG_MARK, "Max children not defined, assuming 10");
52         else maxc = (int) jsonObjectGetNumber(max_children);
53
54         jsonObjectFree(max_req);
55         jsonObjectFree(min_children);
56         jsonObjectFree(max_children);
57         /* --------------------------------------------------- */
58
59         char* resc = va_list_to_string("%s_listener", appname);
60
61         if(!osrf_system_bootstrap_client_resc( NULL, NULL, resc )) {
62                 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()");
63                 free(resc);
64                 return -1;
65         }
66
67         free(resc);
68
69         prefork_simple* forker = prefork_simple_init(
70                 osrfSystemGetTransportClient(), maxr, minc, maxc);
71
72         forker->appname = strdup(appname);
73         forker->keepalive       = kalive;
74
75         if(forker == NULL) {
76                 osrfLogError( OSRF_LOG_MARK, "osrf_prefork_run() failed to create prefork_simple object");
77                 return -1;
78         }
79
80         prefork_launch_children(forker);
81
82         osrf_prefork_register_routers(appname);
83         
84         osrfLogInfo( OSRF_LOG_MARK, "Launching osrf_forker for app %s", appname);
85         prefork_run(forker);
86         
87         osrfLogWarning( OSRF_LOG_MARK, "prefork_run() retuned - how??");
88         prefork_free(forker);
89         return 0;
90
91 }
92
93 void osrf_prefork_register_routers( char* appname ) {
94
95         osrfStringArray* arr = osrfNewStringArray(4);
96
97         int c = osrfConfigGetValueList( NULL, arr, "/routers/router" );
98         char* routerName = osrfConfigGetValue( NULL, "/router_name" );
99         transport_client* client = osrfSystemGetTransportClient();
100
101         osrfLogInfo( OSRF_LOG_MARK, "router name is %s and we have %d routers to connect to", routerName, c );
102
103         while( c ) {
104                 char* domain = osrfStringArrayGetString(arr, --c);
105                 if(domain) {
106
107                         char* jid = va_list_to_string( "%s@%s/router", routerName, domain );
108                         osrfLogInfo( OSRF_LOG_MARK, "Registering with router %s", jid );
109
110                         transport_message* msg = message_init("registering", NULL, NULL, jid, NULL );
111                         message_set_router_info( msg, NULL, NULL, appname, "register", 0 );
112
113                         client_send_message( client, msg );
114                         message_free( msg );
115                         free(jid);
116                 }
117         }
118
119         free(routerName);
120         osrfStringArrayFree(arr);
121 }
122
123 int prefork_child_init_hook(prefork_child* child) {
124
125         if(!child) return -1;
126         osrfLogDebug( OSRF_LOG_MARK, "Child init hook for child %d", child->pid);
127         char* resc = va_list_to_string("%s_drone",child->appname);
128
129         /* we want to remove traces of our parents socket connection 
130          * so we can have our own */
131         osrfSystemIgnoreTransportClient();
132
133         if(!osrf_system_bootstrap_client_resc( NULL, NULL, resc)) {
134                 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()");
135                 free(resc);
136                 return -1;
137         }
138
139         free(resc);
140
141         if( ! osrfAppRunChildInit(child->appname) ) {
142                 osrfLogDebug(OSRF_LOG_MARK, "Prefork child_init succeeded\n");
143         } else {
144                 osrfLogError(OSRF_LOG_MARK, "Prefork child_init failed\n");
145                 return -1;
146         }
147
148         set_proc_title( "OpenSRF Drone [%s]", child->appname );
149         return 0;
150 }
151
152 void prefork_child_process_request(prefork_child* child, char* data) {
153         if( !child ) return;
154
155         /* construct the message from the xml */
156         transport_message* msg = new_message_from_xml( data );
157
158         osrfAppSession* session = osrf_stack_transport_handler(msg, child->appname);
159         if(!session) return;
160
161         if( session->stateless && session->state != OSRF_SESSION_CONNECTED ) {
162                 osrfAppSessionFree( session );
163                 return;
164         }
165
166         osrfLogDebug( OSRF_LOG_MARK, "Entering keepalive loop for session %s", session->session_id );
167         int keepalive = child->keepalive;
168         int retval;
169         time_t start;
170         time_t end;
171
172         while(1) {
173
174                 osrfLogDebug(OSRF_LOG_MARK, 
175                                 "osrf_prefork calling queue_wait [%d] in keepalive loop", keepalive);
176                 start           = time(NULL);
177                 retval  = osrf_app_session_queue_wait(session, keepalive);
178                 end             = time(NULL);
179
180                 if(retval) {
181                         osrfLogError(OSRF_LOG_MARK, "queue-wait returned non-success %d", retval);
182                         break;
183                 }
184
185                 /* see if the client disconnected from us */
186                 if(session->state != OSRF_SESSION_CONNECTED) break;
187
188                 /* see if the used up the timeout */
189                 if( (end - start) >= keepalive ) {
190
191                         osrfLogDebug(OSRF_LOG_MARK, "Keepalive timed out, exiting connected session");
192
193                         osrfAppSessionStatus( 
194                                         session, 
195                                         OSRF_STATUS_TIMEOUT, 
196                                         "osrfConnectStatus", 
197                                         0, "Disconnected on timeout" );
198
199                         break;
200                 }
201         }
202
203         osrfLogDebug( OSRF_LOG_MARK, "Exiting keepalive loop for session %s", session->session_id );
204         osrfAppSessionFree( session );
205         return;
206 }
207
208
209 prefork_simple*  prefork_simple_init( transport_client* client, 
210                 int max_requests, int min_children, int max_children ) {
211
212         if( min_children > max_children ) {
213                 osrfLogError( OSRF_LOG_MARK,  "min_children (%d) is greater "
214                                 "than max_children (%d)", min_children, max_children );
215                 return NULL;
216         }
217
218         if( max_children > ABS_MAX_CHILDREN ) {
219                 osrfLogError( OSRF_LOG_MARK,  "max_children (%d) is greater than ABS_MAX_CHILDREN (%d)",
220                                 max_children, ABS_MAX_CHILDREN );
221                 return NULL;
222         }
223
224         osrfLogInfo(OSRF_LOG_MARK, "Prefork launching child with max_request=%d,"
225                 "min_children=%d, max_children=%d", max_requests, min_children, max_children );
226
227         /* flesh out the struct */
228         prefork_simple* prefork = (prefork_simple*) safe_malloc(sizeof(prefork_simple));        
229         prefork->max_requests = max_requests;
230         prefork->min_children = min_children;
231         prefork->max_children = max_children;
232         prefork->first_child = NULL;
233         prefork->connection = client;
234
235         return prefork;
236 }
237
238 prefork_child*  launch_child( prefork_simple* forker ) {
239
240         pid_t pid;
241         int data_fd[2];
242         int status_fd[2];
243
244         /* Set up the data pipes and add the child struct to the parent */
245         if( pipe(data_fd) < 0 ) { /* build the data pipe*/
246                 osrfLogError( OSRF_LOG_MARK,  "Pipe making error" );
247                 return NULL;
248         }
249
250         if( pipe(status_fd) < 0 ) {/* build the status pipe */
251                 osrfLogError( OSRF_LOG_MARK,  "Pipe making error" );
252                 return NULL;
253         }
254
255         osrfLogInternal( OSRF_LOG_MARK,  "Pipes: %d %d %d %d", data_fd[0], data_fd[1], status_fd[0], status_fd[1] );
256         prefork_child* child = prefork_child_init( forker->max_requests, data_fd[0], 
257                         data_fd[1], status_fd[0], status_fd[1] );
258
259         child->appname = strdup(forker->appname);
260         child->keepalive = forker->keepalive;
261
262
263         add_prefork_child( forker, child );
264
265         if( (pid=fork()) < 0 ) {
266                 osrfLogError( OSRF_LOG_MARK,  "Forking Error" );
267                 return NULL;
268         }
269
270         if( pid > 0 ) {  /* parent */
271
272                 signal(SIGCHLD, sigchld_handler);
273                 (forker->current_num_children)++;
274                 child->pid = pid;
275
276                 osrfLogDebug( OSRF_LOG_MARK,  "Parent launched %d", pid );
277                 /* *no* child pipe FD's can be closed or the parent will re-use fd's that
278                         the children are currently using */
279                 return child;
280         }
281
282         else { /* child */
283
284                 osrfLogInternal( OSRF_LOG_MARK, "I am  new child with read_data_fd = %d and write_status_fd = %d",
285                         child->read_data_fd, child->write_status_fd );
286
287                 child->pid = getpid();
288                 close( child->write_data_fd );
289                 close( child->read_status_fd );
290
291                 /* do the initing */
292                 if( prefork_child_init_hook(child) == -1 ) {
293                         osrfLogError(OSRF_LOG_MARK, 
294                                 "Forker child going away because we could not connect to OpenSRF...");
295                         exit(1);
296                 }
297
298                 prefork_child_wait( child );
299                 exit(0); /* just to be sure */
300          }
301         return NULL;
302 }
303
304
305 void prefork_launch_children( prefork_simple* forker ) {
306         if(!forker) return;
307         int c = 0;
308         while( c++ < forker->min_children )
309                 launch_child( forker );
310 }
311
312
313 void sigchld_handler( int sig ) {
314         signal(SIGCHLD, sigchld_handler);
315         child_dead = 1;
316 }
317
318
319 void reap_children( prefork_simple* forker ) {
320
321         pid_t child_pid;
322         int status;
323
324         while( (child_pid=waitpid(-1,&status,WNOHANG)) > 0) 
325                 del_prefork_child( forker, child_pid ); 
326
327         /* replenish */
328         while( forker->current_num_children < forker->min_children ) 
329                 launch_child( forker );
330
331         child_dead = 0;
332 }
333
334 void prefork_run(prefork_simple* forker) {
335
336         if( forker->first_child == NULL )
337                 return;
338
339         transport_message* cur_msg = NULL;
340
341
342         while(1) {
343
344                 if( forker->first_child == NULL ) {/* no more children */
345                         osrfLogWarning( OSRF_LOG_MARK, "No more children..." );
346                         return;
347                 }
348
349                 osrfLogDebug( OSRF_LOG_MARK, "Forker going into wait for data...");
350                 cur_msg = client_recv( forker->connection, -1 );
351
352                 //fprintf(stderr, "Got Data %f\n", get_timestamp_millis() );
353
354                 if( cur_msg == NULL ) continue;
355
356                 int honored = 0;        /* true if we've serviced the request */
357
358                 while( ! honored ) {
359
360                         check_children( forker ); 
361
362                         osrfLogDebug( OSRF_LOG_MARK,  "Server received inbound data" );
363                         int k;
364                         prefork_child* cur_child = forker->first_child;
365
366                         /* Look for an available child */
367                         for( k = 0; k < forker->current_num_children; k++ ) {
368
369                                 osrfLogInternal( OSRF_LOG_MARK, "Searching for available child. cur_child->pid = %d", cur_child->pid );
370                                 osrfLogInternal( OSRF_LOG_MARK, "Current num children %d and loop %d", forker->current_num_children, k);
371                         
372                                 if( cur_child->available ) {
373                                         osrfLogDebug( OSRF_LOG_MARK,  "forker sending data to %d", cur_child->pid );
374
375                                         message_prepare_xml( cur_msg );
376                                         char* data = cur_msg->msg_xml;
377                                         if( ! data || strlen(data) < 1 ) break;
378
379                                         cur_child->available = 0;
380                                         osrfLogInternal( OSRF_LOG_MARK,  "Writing to child fd %d", cur_child->write_data_fd );
381
382                                         int written = 0;
383                                         //fprintf(stderr, "Writing Data %f\n", get_timestamp_millis() );
384                                         if( (written = write( cur_child->write_data_fd, data, strlen(data) + 1 )) < 0 ) {
385                                                 osrfLogWarning( OSRF_LOG_MARK, "Write returned error %d", errno);
386                                                 cur_child = cur_child->next;
387                                                 continue;
388                                         }
389
390                                         //fprintf(stderr, "Wrote %d bytes to child\n", written);
391
392                                         forker->first_child = cur_child->next;
393                                         honored = 1;
394                                         break;
395                                 } else 
396                                         cur_child = cur_child->next;
397                         } 
398
399                         /* if none available, add a new child if we can */
400                         if( ! honored ) {
401                                 osrfLogDebug( OSRF_LOG_MARK, "Not enough children, attempting to add...");
402                                 if( forker->current_num_children < forker->max_children ) {
403                                         osrfLogDebug( OSRF_LOG_MARK,  "Launching new child with current_num = %d",
404                                                         forker->current_num_children );
405
406                                         prefork_child* new_child = launch_child( forker );
407                                         message_prepare_xml( cur_msg );
408                                         char* data = cur_msg->msg_xml;
409                                         if( ! data || strlen(data) < 1 ) break;
410                                         new_child->available = 0;
411                                         osrfLogDebug( OSRF_LOG_MARK,  "Writing to new child fd %d : pid %d", 
412                                                         new_child->write_data_fd, new_child->pid );
413                                         write( new_child->write_data_fd, data, strlen(data) + 1 );
414                                         forker->first_child = new_child->next;
415                                         honored = 1;
416                                 }
417                         }
418
419                         if( !honored ) {
420                                 osrfLogWarning( OSRF_LOG_MARK,  "No children available, sleeping and looping..." );
421                                 usleep( 50000 ); /* 50 milliseconds */
422                         }
423
424                         if( child_dead )
425                                 reap_children(forker);
426
427
428                         //fprintf(stderr, "Parent done with request %f\n", get_timestamp_millis() );
429
430                 } // honored?
431
432                 message_free( cur_msg );
433
434         } /* top level listen loop */
435
436 }
437
438
439 void check_children( prefork_simple* forker ) {
440
441         //check_begin:
442
443         int select_ret;
444         fd_set read_set;
445         FD_ZERO(&read_set);
446         int max_fd = 0;
447         int n;
448
449         struct timeval tv;
450         tv.tv_sec       = 0;
451         tv.tv_usec      = 0;
452
453         if( child_dead )
454                 reap_children(forker);
455
456         prefork_child* cur_child = forker->first_child;
457
458         int i;
459         for( i = 0; i!= forker->current_num_children; i++ ) {
460
461                 if( cur_child->read_status_fd > max_fd )
462                         max_fd = cur_child->read_status_fd;
463                 FD_SET( cur_child->read_status_fd, &read_set );
464                 cur_child = cur_child->next;
465         }
466
467         FD_CLR(0,&read_set);/* just to be sure */
468
469         if( (select_ret=select( max_fd + 1 , &read_set, NULL, NULL, &tv)) == -1 ) {
470                 osrfLogWarning( OSRF_LOG_MARK,  "Select returned error %d on check_children", errno );
471         }
472
473         if( select_ret == 0 )
474                 return;
475
476         /* see if one of a child has told us it's done */
477         cur_child = forker->first_child;
478         int j;
479         int num_handled = 0;
480         for( j = 0; j!= forker->current_num_children && num_handled < select_ret ; j++ ) {
481
482                 if( FD_ISSET( cur_child->read_status_fd, &read_set ) ) {
483                         //printf( "Server received status from a child %d\n", cur_child->pid );
484                         osrfLogDebug( OSRF_LOG_MARK,  "Server received status from a child %d", cur_child->pid );
485
486                         num_handled++;
487
488                         /* now suck off the data */
489                         char buf[64];
490                         memset( buf, 0, 64);
491                         if( (n=read(cur_child->read_status_fd, buf, 63))  < 0 ) {
492                                 osrfLogWarning( OSRF_LOG_MARK, "Read error afer select in child status read with errno %d", errno);
493                         }
494
495                         osrfLogDebug( OSRF_LOG_MARK,  "Read %d bytes from status buffer: %s", n, buf );
496                         cur_child->available = 1;
497                 }
498                 cur_child = cur_child->next;
499         } 
500
501 }
502
503
504 void prefork_child_wait( prefork_child* child ) {
505
506         int i,n;
507         growing_buffer* gbuf = buffer_init( READ_BUFSIZE );
508         char buf[READ_BUFSIZE];
509         memset( buf, 0, READ_BUFSIZE );
510
511         for( i = 0; i < child->max_requests; i++ ) {
512
513                 n = -1;
514                 clr_fl(child->read_data_fd, O_NONBLOCK );
515                 while( (n=read(child->read_data_fd, buf, READ_BUFSIZE-1)) > 0 ) {
516                         buffer_add( gbuf, buf );
517                         memset( buf, 0, READ_BUFSIZE );
518
519                         //fprintf(stderr, "Child read %d bytes\n", n);
520
521                         if( n == READ_BUFSIZE ) { 
522                                 //fprintf(stderr, "We read READ_BUFSIZE data....\n");
523                                 /* XXX */
524                                 /* either we have exactly READ_BUFSIZE data, 
525                                         or there's more waiting that we need to grab*/
526                                 /* must set to non-block for reading more */
527                         } else {
528                                 //fprintf(stderr, "Read Data %f\n", get_timestamp_millis() );
529                                 prefork_child_process_request(child, gbuf->buf);
530                                 buffer_reset( gbuf );
531                                 break;
532                         }
533                 }
534
535                 if( n < 0 ) {
536                         osrfLogWarning( OSRF_LOG_MARK,  "Prefork child read returned error with errno %d", errno );
537                         break;
538                 }
539
540                 if( i < child->max_requests - 1 ) 
541                         write( child->write_status_fd, "available" /*less than 64 bytes*/, 9 );
542         }
543
544         buffer_free(gbuf);
545
546         osrfLogDebug( OSRF_LOG_MARK, "Child with max-requests=%d, num-served=%d exiting...[%d]", 
547                         child->max_requests, i, getpid() );
548
549         exit(0);
550 }
551
552
553 void add_prefork_child( prefork_simple* forker, prefork_child* child ) {
554         
555         if( forker->first_child == NULL ) {
556                 forker->first_child = child;
557                 child->next = child;
558                 return;
559         }
560
561         /* we put the child in as the last because, regardless, 
562                 we have to do the DLL splice dance, and this is the
563            simplest way */
564
565         prefork_child* start_child = forker->first_child;
566         while(1) {
567                 if( forker->first_child->next == start_child ) 
568                         break;
569                 forker->first_child = forker->first_child->next;
570         }
571
572         /* here we know that forker->first_child is the last element 
573                 in the list and start_child is the first.  Insert the
574                 new child between them*/
575
576         forker->first_child->next = child;
577         child->next = start_child;
578         return;
579 }
580
581 prefork_child* find_prefork_child( prefork_simple* forker, pid_t pid ) {
582
583         if( forker->first_child == NULL ) { return NULL; }
584         prefork_child* start_child = forker->first_child;
585         do {
586                 if( forker->first_child->pid == pid ) 
587                         return forker->first_child;
588         } while( (forker->first_child = forker->first_child->next) != start_child );
589
590         return NULL;
591 }
592
593
594 void del_prefork_child( prefork_simple* forker, pid_t pid ) { 
595
596         if( forker->first_child == NULL ) { return; }
597
598         (forker->current_num_children)--;
599         osrfLogDebug( OSRF_LOG_MARK, "Deleting Child: %d", pid );
600
601         prefork_child* start_child = forker->first_child; /* starting point */
602         prefork_child* cur_child        = start_child; /* current pointer */
603         prefork_child* prev_child       = start_child; /* the trailing pointer */
604
605         /* special case where there is only one in the list */
606         if( start_child == start_child->next ) {
607                 if( start_child->pid == pid ) {
608                         forker->first_child = NULL;
609
610                         close( start_child->read_data_fd );
611                         close( start_child->write_data_fd );
612                         close( start_child->read_status_fd );
613                         close( start_child->write_status_fd );
614
615                         prefork_child_free( start_child );
616                 }
617                 return;
618         }
619
620
621         /* special case where the first item in the list needs to be removed */
622         if( start_child->pid == pid ) { 
623
624                 /* find the last one so we can remove the start_child */
625                 do { 
626                         prev_child = cur_child;
627                         cur_child = cur_child->next;
628                 }while( cur_child != start_child );
629
630                 /* now cur_child == start_child */
631                 prev_child->next = cur_child->next;
632                 forker->first_child = prev_child;
633
634                 close( cur_child->read_data_fd );
635                 close( cur_child->write_data_fd );
636                 close( cur_child->read_status_fd );
637                 close( cur_child->write_status_fd );
638
639                 prefork_child_free( cur_child );
640                 return;
641         } 
642
643         do {
644                 prev_child = cur_child;
645                 cur_child = cur_child->next;
646
647                 if( cur_child->pid == pid ) {
648                         prev_child->next = cur_child->next;
649
650                         close( cur_child->read_data_fd );
651                         close( cur_child->write_data_fd );
652                         close( cur_child->read_status_fd );
653                         close( cur_child->write_status_fd );
654
655                         prefork_child_free( cur_child );
656                         return;
657                 }
658
659         } while(cur_child != start_child);
660 }
661
662
663
664
665 prefork_child* prefork_child_init( 
666         int max_requests, int read_data_fd, int write_data_fd, 
667         int read_status_fd, int write_status_fd ) {
668
669         prefork_child* child = (prefork_child*) safe_malloc(sizeof(prefork_child));
670         child->max_requests             = max_requests;
671         child->read_data_fd             = read_data_fd;
672         child->write_data_fd            = write_data_fd;
673         child->read_status_fd   = read_status_fd;
674         child->write_status_fd  = write_status_fd;
675         child->available                        = 1;
676
677         return child;
678 }
679
680
681 int prefork_free( prefork_simple* prefork ) {
682         
683         while( prefork->first_child != NULL ) {
684                 osrfLogInfo( OSRF_LOG_MARK,  "Killing children and sleeping 1 to reap..." );
685                 kill( 0,        SIGKILL );
686                 sleep(1);
687         }
688
689         client_free(prefork->connection);
690         free(prefork->appname);
691         free( prefork );
692         return 1;
693 }
694
695 int prefork_child_free( prefork_child* child ) { 
696         free(child->appname);
697         close(child->read_data_fd);
698         close(child->write_status_fd);
699         free( child ); 
700         return 1;
701 }
702