]> git.evergreen-ils.org Git - OpenSRF.git/blob - src/libstack/osrf_prefork.c
added logic to reconnect to jabber if the child receiving data realizes the connectio...
[OpenSRF.git] / src / libstack / osrf_prefork.c
1 #include "osrf_prefork.h"
2 #include <signal.h>
3 #include "osrf_app_session.h"
4 #include "osrf_application.h"
5
6 /* true if we just deleted a child.  This will allow us to make sure we're
7         not trying to use freed memory */
8 int child_dead;
9
10 int main();
11 void sigchld_handler( int sig );
12
13 int osrf_prefork_run(char* appname) {
14
15         if(!appname) {
16                 osrfLogError( OSRF_LOG_MARK, "osrf_prefork_run requires an appname to run!");
17                 return -1;
18         }
19
20         set_proc_title( "OpenSRF Listener [%s]", appname );
21
22         int maxr = 1000; 
23         int maxc = 10;
24         int minc = 3;
25
26         osrfLogInfo( OSRF_LOG_MARK, "Loading config in osrf_forker for app %s", appname);
27
28         jsonObject* max_req = osrf_settings_host_value_object("/apps/%s/unix_config/max_requests", appname);
29         jsonObject* min_children = osrf_settings_host_value_object("/apps/%s/unix_config/min_children", appname);
30         jsonObject* max_children = osrf_settings_host_value_object("/apps/%s/unix_config/max_children", appname);
31
32         char* keepalive = osrf_settings_host_value("/apps/%s/keepalive", appname);
33         time_t kalive;
34         if( keepalive ) {
35                 kalive = atoi(keepalive);
36                 free(keepalive);
37         } else {
38                 kalive = 5; /* give it a default */
39         }
40
41         osrfLogInfo(OSRF_LOG_MARK, "keepalive setting = %d seconds", kalive);
42
43
44         
45         if(!max_req) osrfLogWarning( OSRF_LOG_MARK, "Max requests not defined, assuming 1000");
46         else maxr = (int) jsonObjectGetNumber(max_req);
47
48         if(!min_children) osrfLogWarning( OSRF_LOG_MARK, "Min children not defined, assuming 3");
49         else minc = (int) jsonObjectGetNumber(min_children);
50
51         if(!max_children) osrfLogWarning( OSRF_LOG_MARK, "Max children not defined, assuming 10");
52         else maxc = (int) jsonObjectGetNumber(max_children);
53
54         jsonObjectFree(max_req);
55         jsonObjectFree(min_children);
56         jsonObjectFree(max_children);
57         /* --------------------------------------------------- */
58
59         char* resc = va_list_to_string("%s_listener", appname);
60
61         if(!osrf_system_bootstrap_client_resc( NULL, NULL, resc )) {
62                 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()");
63                 free(resc);
64                 return -1;
65         }
66
67         free(resc);
68
69         prefork_simple* forker = prefork_simple_init(
70                 osrfSystemGetTransportClient(), maxr, minc, maxc);
71
72         forker->appname = strdup(appname);
73         forker->keepalive       = kalive;
74
75         if(forker == NULL) {
76                 osrfLogError( OSRF_LOG_MARK, "osrf_prefork_run() failed to create prefork_simple object");
77                 return -1;
78         }
79
80         prefork_launch_children(forker);
81
82         osrf_prefork_register_routers(appname);
83         
84         osrfLogInfo( OSRF_LOG_MARK, "Launching osrf_forker for app %s", appname);
85         prefork_run(forker);
86         
87         osrfLogWarning( OSRF_LOG_MARK, "prefork_run() retuned - how??");
88         prefork_free(forker);
89         return 0;
90
91 }
92
93 void osrf_prefork_register_routers( char* appname ) {
94
95         osrfStringArray* arr = osrfNewStringArray(4);
96
97         int c = osrfConfigGetValueList( NULL, arr, "/routers/router" );
98         char* routerName = osrfConfigGetValue( NULL, "/router_name" );
99         transport_client* client = osrfSystemGetTransportClient();
100
101         osrfLogInfo( OSRF_LOG_MARK, "router name is %s and we have %d routers to connect to", routerName, c );
102
103         while( c ) {
104                 char* domain = osrfStringArrayGetString(arr, --c);
105                 if(domain) {
106
107                         char* jid = va_list_to_string( "%s@%s/router", routerName, domain );
108                         osrfLogInfo( OSRF_LOG_MARK, "Registering with router %s", jid );
109
110                         transport_message* msg = message_init("registering", NULL, NULL, jid, NULL );
111                         message_set_router_info( msg, NULL, NULL, appname, "register", 0 );
112
113                         client_send_message( client, msg );
114                         message_free( msg );
115                         free(jid);
116                 }
117         }
118
119         free(routerName);
120         osrfStringArrayFree(arr);
121 }
122
123 int prefork_child_init_hook(prefork_child* child) {
124
125         if(!child) return -1;
126         osrfLogDebug( OSRF_LOG_MARK, "Child init hook for child %d", child->pid);
127         char* resc = va_list_to_string("%s_drone",child->appname);
128
129         /* we want to remove traces of our parents socket connection 
130          * so we can have our own */
131         osrfSystemIgnoreTransportClient();
132
133         if(!osrf_system_bootstrap_client_resc( NULL, NULL, resc)) {
134                 osrfLogError( OSRF_LOG_MARK, "Unable to bootstrap client for osrf_prefork_run()");
135                 free(resc);
136                 return -1;
137         }
138
139         free(resc);
140
141         if( ! osrfAppRunChildInit(child->appname) ) {
142                 osrfLogDebug(OSRF_LOG_MARK, "Prefork child_init succeeded\n");
143         } else {
144                 osrfLogError(OSRF_LOG_MARK, "Prefork child_init failed\n");
145                 return -1;
146         }
147
148         set_proc_title( "OpenSRF Drone [%s]", child->appname );
149         return 0;
150 }
151
152 void prefork_child_process_request(prefork_child* child, char* data) {
153         if( !child ) return;
154
155         transport_client* client = osrfSystemGetTransportClient();
156
157         if(!client_connected(client)) {
158                 osrfSystemIgnoreTransportClient();
159                 osrfLogWarning(OSRF_LOG_MARK, "Reconnecting child to opensrf after disconnect...");
160                 if(!osrf_system_bootstrap_client(NULL, NULL)) {
161                         osrfLogError( OSRF_LOG_MARK, 
162                                 "Unable to bootstrap client in prefork_child_process_request()");
163                         sleep(1);
164                         exit(1);
165                 }
166         }
167
168         /* construct the message from the xml */
169         transport_message* msg = new_message_from_xml( data );
170
171         osrfAppSession* session = osrf_stack_transport_handler(msg, child->appname);
172         if(!session) return;
173
174         if( session->stateless && session->state != OSRF_SESSION_CONNECTED ) {
175                 osrfAppSessionFree( session );
176                 return;
177         }
178
179         osrfLogDebug( OSRF_LOG_MARK, "Entering keepalive loop for session %s", session->session_id );
180         int keepalive = child->keepalive;
181         int retval;
182         time_t start;
183         time_t end;
184
185         while(1) {
186
187                 osrfLogDebug(OSRF_LOG_MARK, 
188                                 "osrf_prefork calling queue_wait [%d] in keepalive loop", keepalive);
189                 start           = time(NULL);
190                 retval  = osrf_app_session_queue_wait(session, keepalive);
191                 end             = time(NULL);
192
193                 if(retval) {
194                         osrfLogError(OSRF_LOG_MARK, "queue-wait returned non-success %d", retval);
195                         break;
196                 }
197
198                 /* see if the client disconnected from us */
199                 if(session->state != OSRF_SESSION_CONNECTED) break;
200
201                 /* see if the used up the timeout */
202                 if( (end - start) >= keepalive ) {
203
204                         osrfLogDebug(OSRF_LOG_MARK, "Keepalive timed out, exiting connected session");
205
206                         osrfAppSessionStatus( 
207                                         session, 
208                                         OSRF_STATUS_TIMEOUT, 
209                                         "osrfConnectStatus", 
210                                         0, "Disconnected on timeout" );
211
212                         break;
213                 }
214         }
215
216         osrfLogDebug( OSRF_LOG_MARK, "Exiting keepalive loop for session %s", session->session_id );
217         osrfAppSessionFree( session );
218         return;
219 }
220
221
222 prefork_simple*  prefork_simple_init( transport_client* client, 
223                 int max_requests, int min_children, int max_children ) {
224
225         if( min_children > max_children ) {
226                 osrfLogError( OSRF_LOG_MARK,  "min_children (%d) is greater "
227                                 "than max_children (%d)", min_children, max_children );
228                 return NULL;
229         }
230
231         if( max_children > ABS_MAX_CHILDREN ) {
232                 osrfLogError( OSRF_LOG_MARK,  "max_children (%d) is greater than ABS_MAX_CHILDREN (%d)",
233                                 max_children, ABS_MAX_CHILDREN );
234                 return NULL;
235         }
236
237         osrfLogInfo(OSRF_LOG_MARK, "Prefork launching child with max_request=%d,"
238                 "min_children=%d, max_children=%d", max_requests, min_children, max_children );
239
240         /* flesh out the struct */
241         prefork_simple* prefork = (prefork_simple*) safe_malloc(sizeof(prefork_simple));        
242         prefork->max_requests = max_requests;
243         prefork->min_children = min_children;
244         prefork->max_children = max_children;
245         prefork->first_child = NULL;
246         prefork->connection = client;
247
248         return prefork;
249 }
250
251 prefork_child*  launch_child( prefork_simple* forker ) {
252
253         pid_t pid;
254         int data_fd[2];
255         int status_fd[2];
256
257         /* Set up the data pipes and add the child struct to the parent */
258         if( pipe(data_fd) < 0 ) { /* build the data pipe*/
259                 osrfLogError( OSRF_LOG_MARK,  "Pipe making error" );
260                 return NULL;
261         }
262
263         if( pipe(status_fd) < 0 ) {/* build the status pipe */
264                 osrfLogError( OSRF_LOG_MARK,  "Pipe making error" );
265                 return NULL;
266         }
267
268         osrfLogInternal( OSRF_LOG_MARK,  "Pipes: %d %d %d %d", data_fd[0], data_fd[1], status_fd[0], status_fd[1] );
269         prefork_child* child = prefork_child_init( forker->max_requests, data_fd[0], 
270                         data_fd[1], status_fd[0], status_fd[1] );
271
272         child->appname = strdup(forker->appname);
273         child->keepalive = forker->keepalive;
274
275
276         add_prefork_child( forker, child );
277
278         if( (pid=fork()) < 0 ) {
279                 osrfLogError( OSRF_LOG_MARK,  "Forking Error" );
280                 return NULL;
281         }
282
283         if( pid > 0 ) {  /* parent */
284
285                 signal(SIGCHLD, sigchld_handler);
286                 (forker->current_num_children)++;
287                 child->pid = pid;
288
289                 osrfLogDebug( OSRF_LOG_MARK,  "Parent launched %d", pid );
290                 /* *no* child pipe FD's can be closed or the parent will re-use fd's that
291                         the children are currently using */
292                 return child;
293         }
294
295         else { /* child */
296
297                 osrfLogInternal( OSRF_LOG_MARK, "I am  new child with read_data_fd = %d and write_status_fd = %d",
298                         child->read_data_fd, child->write_status_fd );
299
300                 child->pid = getpid();
301                 close( child->write_data_fd );
302                 close( child->read_status_fd );
303
304                 /* do the initing */
305                 if( prefork_child_init_hook(child) == -1 ) {
306                         osrfLogError(OSRF_LOG_MARK, 
307                                 "Forker child going away because we could not connect to OpenSRF...");
308                         exit(1);
309                 }
310
311                 prefork_child_wait( child );
312                 exit(0); /* just to be sure */
313          }
314         return NULL;
315 }
316
317
318 void prefork_launch_children( prefork_simple* forker ) {
319         if(!forker) return;
320         int c = 0;
321         while( c++ < forker->min_children )
322                 launch_child( forker );
323 }
324
325
326 void sigchld_handler( int sig ) {
327         signal(SIGCHLD, sigchld_handler);
328         child_dead = 1;
329 }
330
331
332 void reap_children( prefork_simple* forker ) {
333
334         pid_t child_pid;
335         int status;
336
337         while( (child_pid=waitpid(-1,&status,WNOHANG)) > 0) 
338                 del_prefork_child( forker, child_pid ); 
339
340         /* replenish */
341         while( forker->current_num_children < forker->min_children ) 
342                 launch_child( forker );
343
344         child_dead = 0;
345 }
346
347 void prefork_run(prefork_simple* forker) {
348
349         if( forker->first_child == NULL )
350                 return;
351
352         transport_message* cur_msg = NULL;
353
354
355         while(1) {
356
357                 if( forker->first_child == NULL ) {/* no more children */
358                         osrfLogWarning( OSRF_LOG_MARK, "No more children..." );
359                         return;
360                 }
361
362                 osrfLogDebug( OSRF_LOG_MARK, "Forker going into wait for data...");
363                 cur_msg = client_recv( forker->connection, -1 );
364
365                 //fprintf(stderr, "Got Data %f\n", get_timestamp_millis() );
366
367                 if( cur_msg == NULL ) continue;
368
369                 int honored = 0;        /* true if we've serviced the request */
370
371                 while( ! honored ) {
372
373                         check_children( forker ); 
374
375                         osrfLogDebug( OSRF_LOG_MARK,  "Server received inbound data" );
376                         int k;
377                         prefork_child* cur_child = forker->first_child;
378
379                         /* Look for an available child */
380                         for( k = 0; k < forker->current_num_children; k++ ) {
381
382                                 osrfLogInternal( OSRF_LOG_MARK, "Searching for available child. cur_child->pid = %d", cur_child->pid );
383                                 osrfLogInternal( OSRF_LOG_MARK, "Current num children %d and loop %d", forker->current_num_children, k);
384                         
385                                 if( cur_child->available ) {
386                                         osrfLogDebug( OSRF_LOG_MARK,  "forker sending data to %d", cur_child->pid );
387
388                                         message_prepare_xml( cur_msg );
389                                         char* data = cur_msg->msg_xml;
390                                         if( ! data || strlen(data) < 1 ) break;
391
392                                         cur_child->available = 0;
393                                         osrfLogInternal( OSRF_LOG_MARK,  "Writing to child fd %d", cur_child->write_data_fd );
394
395                                         int written = 0;
396                                         //fprintf(stderr, "Writing Data %f\n", get_timestamp_millis() );
397                                         if( (written = write( cur_child->write_data_fd, data, strlen(data) + 1 )) < 0 ) {
398                                                 osrfLogWarning( OSRF_LOG_MARK, "Write returned error %d", errno);
399                                                 cur_child = cur_child->next;
400                                                 continue;
401                                         }
402
403                                         //fprintf(stderr, "Wrote %d bytes to child\n", written);
404
405                                         forker->first_child = cur_child->next;
406                                         honored = 1;
407                                         break;
408                                 } else 
409                                         cur_child = cur_child->next;
410                         } 
411
412                         /* if none available, add a new child if we can */
413                         if( ! honored ) {
414                                 osrfLogDebug( OSRF_LOG_MARK, "Not enough children, attempting to add...");
415                                 if( forker->current_num_children < forker->max_children ) {
416                                         osrfLogDebug( OSRF_LOG_MARK,  "Launching new child with current_num = %d",
417                                                         forker->current_num_children );
418
419                                         prefork_child* new_child = launch_child( forker );
420                                         message_prepare_xml( cur_msg );
421                                         char* data = cur_msg->msg_xml;
422                                         if( ! data || strlen(data) < 1 ) break;
423                                         new_child->available = 0;
424                                         osrfLogDebug( OSRF_LOG_MARK,  "Writing to new child fd %d : pid %d", 
425                                                         new_child->write_data_fd, new_child->pid );
426                                         write( new_child->write_data_fd, data, strlen(data) + 1 );
427                                         forker->first_child = new_child->next;
428                                         honored = 1;
429                                 }
430                         }
431
432                         if( !honored ) {
433                                 osrfLogWarning( OSRF_LOG_MARK,  "No children available, sleeping and looping..." );
434                                 usleep( 50000 ); /* 50 milliseconds */
435                         }
436
437                         if( child_dead )
438                                 reap_children(forker);
439
440
441                         //fprintf(stderr, "Parent done with request %f\n", get_timestamp_millis() );
442
443                 } // honored?
444
445                 message_free( cur_msg );
446
447         } /* top level listen loop */
448
449 }
450
451
452 void check_children( prefork_simple* forker ) {
453
454         //check_begin:
455
456         int select_ret;
457         fd_set read_set;
458         FD_ZERO(&read_set);
459         int max_fd = 0;
460         int n;
461
462         struct timeval tv;
463         tv.tv_sec       = 0;
464         tv.tv_usec      = 0;
465
466         if( child_dead )
467                 reap_children(forker);
468
469         prefork_child* cur_child = forker->first_child;
470
471         int i;
472         for( i = 0; i!= forker->current_num_children; i++ ) {
473
474                 if( cur_child->read_status_fd > max_fd )
475                         max_fd = cur_child->read_status_fd;
476                 FD_SET( cur_child->read_status_fd, &read_set );
477                 cur_child = cur_child->next;
478         }
479
480         FD_CLR(0,&read_set);/* just to be sure */
481
482         if( (select_ret=select( max_fd + 1 , &read_set, NULL, NULL, &tv)) == -1 ) {
483                 osrfLogWarning( OSRF_LOG_MARK,  "Select returned error %d on check_children", errno );
484         }
485
486         if( select_ret == 0 )
487                 return;
488
489         /* see if one of a child has told us it's done */
490         cur_child = forker->first_child;
491         int j;
492         int num_handled = 0;
493         for( j = 0; j!= forker->current_num_children && num_handled < select_ret ; j++ ) {
494
495                 if( FD_ISSET( cur_child->read_status_fd, &read_set ) ) {
496                         //printf( "Server received status from a child %d\n", cur_child->pid );
497                         osrfLogDebug( OSRF_LOG_MARK,  "Server received status from a child %d", cur_child->pid );
498
499                         num_handled++;
500
501                         /* now suck off the data */
502                         char buf[64];
503                         memset( buf, 0, 64);
504                         if( (n=read(cur_child->read_status_fd, buf, 63))  < 0 ) {
505                                 osrfLogWarning( OSRF_LOG_MARK, "Read error afer select in child status read with errno %d", errno);
506                         }
507
508                         osrfLogDebug( OSRF_LOG_MARK,  "Read %d bytes from status buffer: %s", n, buf );
509                         cur_child->available = 1;
510                 }
511                 cur_child = cur_child->next;
512         } 
513
514 }
515
516
517 void prefork_child_wait( prefork_child* child ) {
518
519         int i,n;
520         growing_buffer* gbuf = buffer_init( READ_BUFSIZE );
521         char buf[READ_BUFSIZE];
522         memset( buf, 0, READ_BUFSIZE );
523
524         for( i = 0; i < child->max_requests; i++ ) {
525
526                 n = -1;
527                 int gotdata = 0;
528                 clr_fl(child->read_data_fd, O_NONBLOCK );
529
530                 while( (n=read(child->read_data_fd, buf, READ_BUFSIZE-1)) > 0 ) {
531                         osrfLogDebug(OSRF_LOG_MARK, "Prefork child read %d bytes of data", n);
532                         if(!gotdata)
533                                 set_fl(child->read_data_fd, O_NONBLOCK );
534                         buffer_add( gbuf, buf );
535                         memset( buf, 0, READ_BUFSIZE );
536                         gotdata = 1;
537                 }
538
539                 if( errno == EAGAIN ) n = 0;
540
541                 if( n < 0 ) {
542                         osrfLogWarning( OSRF_LOG_MARK,  "Prefork child read returned error with errno %d", errno );
543                         break;
544
545                 } else if( gotdata ) {
546                         osrfLogDebug(OSRF_LOG_MARK, "Prefork child got a request.. processing..");
547                         prefork_child_process_request(child, gbuf->buf);
548                         buffer_reset( gbuf );
549                 }
550
551                 if( i < child->max_requests - 1 ) 
552                         write( child->write_status_fd, "available" /*less than 64 bytes*/, 9 );
553         }
554
555         buffer_free(gbuf);
556
557         osrfLogDebug( OSRF_LOG_MARK, "Child with max-requests=%d, num-served=%d exiting...[%d]", 
558                         child->max_requests, i, getpid() );
559
560         exit(0);
561 }
562
563
564 void add_prefork_child( prefork_simple* forker, prefork_child* child ) {
565         
566         if( forker->first_child == NULL ) {
567                 forker->first_child = child;
568                 child->next = child;
569                 return;
570         }
571
572         /* we put the child in as the last because, regardless, 
573                 we have to do the DLL splice dance, and this is the
574            simplest way */
575
576         prefork_child* start_child = forker->first_child;
577         while(1) {
578                 if( forker->first_child->next == start_child ) 
579                         break;
580                 forker->first_child = forker->first_child->next;
581         }
582
583         /* here we know that forker->first_child is the last element 
584                 in the list and start_child is the first.  Insert the
585                 new child between them*/
586
587         forker->first_child->next = child;
588         child->next = start_child;
589         return;
590 }
591
592 prefork_child* find_prefork_child( prefork_simple* forker, pid_t pid ) {
593
594         if( forker->first_child == NULL ) { return NULL; }
595         prefork_child* start_child = forker->first_child;
596         do {
597                 if( forker->first_child->pid == pid ) 
598                         return forker->first_child;
599         } while( (forker->first_child = forker->first_child->next) != start_child );
600
601         return NULL;
602 }
603
604
605 void del_prefork_child( prefork_simple* forker, pid_t pid ) { 
606
607         if( forker->first_child == NULL ) { return; }
608
609         (forker->current_num_children)--;
610         osrfLogDebug( OSRF_LOG_MARK, "Deleting Child: %d", pid );
611
612         prefork_child* start_child = forker->first_child; /* starting point */
613         prefork_child* cur_child        = start_child; /* current pointer */
614         prefork_child* prev_child       = start_child; /* the trailing pointer */
615
616         /* special case where there is only one in the list */
617         if( start_child == start_child->next ) {
618                 if( start_child->pid == pid ) {
619                         forker->first_child = NULL;
620
621                         close( start_child->read_data_fd );
622                         close( start_child->write_data_fd );
623                         close( start_child->read_status_fd );
624                         close( start_child->write_status_fd );
625
626                         prefork_child_free( start_child );
627                 }
628                 return;
629         }
630
631
632         /* special case where the first item in the list needs to be removed */
633         if( start_child->pid == pid ) { 
634
635                 /* find the last one so we can remove the start_child */
636                 do { 
637                         prev_child = cur_child;
638                         cur_child = cur_child->next;
639                 }while( cur_child != start_child );
640
641                 /* now cur_child == start_child */
642                 prev_child->next = cur_child->next;
643                 forker->first_child = prev_child;
644
645                 close( cur_child->read_data_fd );
646                 close( cur_child->write_data_fd );
647                 close( cur_child->read_status_fd );
648                 close( cur_child->write_status_fd );
649
650                 prefork_child_free( cur_child );
651                 return;
652         } 
653
654         do {
655                 prev_child = cur_child;
656                 cur_child = cur_child->next;
657
658                 if( cur_child->pid == pid ) {
659                         prev_child->next = cur_child->next;
660
661                         close( cur_child->read_data_fd );
662                         close( cur_child->write_data_fd );
663                         close( cur_child->read_status_fd );
664                         close( cur_child->write_status_fd );
665
666                         prefork_child_free( cur_child );
667                         return;
668                 }
669
670         } while(cur_child != start_child);
671 }
672
673
674
675
676 prefork_child* prefork_child_init( 
677         int max_requests, int read_data_fd, int write_data_fd, 
678         int read_status_fd, int write_status_fd ) {
679
680         prefork_child* child = (prefork_child*) safe_malloc(sizeof(prefork_child));
681         child->max_requests             = max_requests;
682         child->read_data_fd             = read_data_fd;
683         child->write_data_fd            = write_data_fd;
684         child->read_status_fd   = read_status_fd;
685         child->write_status_fd  = write_status_fd;
686         child->available                        = 1;
687
688         return child;
689 }
690
691
692 int prefork_free( prefork_simple* prefork ) {
693         
694         while( prefork->first_child != NULL ) {
695                 osrfLogInfo( OSRF_LOG_MARK,  "Killing children and sleeping 1 to reap..." );
696                 kill( 0,        SIGKILL );
697                 sleep(1);
698         }
699
700         client_free(prefork->connection);
701         free(prefork->appname);
702         free( prefork );
703         return 1;
704 }
705
706 int prefork_child_free( prefork_child* child ) { 
707         free(child->appname);
708         close(child->read_data_fd);
709         close(child->write_status_fd);
710         free( child ); 
711         return 1;
712 }
713