From f44ef93ff860a43f49a8dc54da8d452afad7620d Mon Sep 17 00:00:00 2001 From: erickson Date: Mon, 25 Jan 2010 16:29:19 +0000 Subject: [PATCH] Added some fault tolerance to the inbound-to-unixserver message handoff process. In some conditions, the unix socket will result in sigpipe (anecdotally, seen more oftenvirtualized environments). these changes add a call to select before writing to the socket to check for socket availability and add a sigpipe handler that forces the inbound process to wait a brief period of time before trying the delivery again git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1901 9efc2488-bf62-4759-914b-345cdb29e865 --- .../OpenSRF/Transport/SlimJabber/Inbound.pm | 65 ++++++++++++++----- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm b/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm index 5b72fe5..898a528 100644 --- a/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm +++ b/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm @@ -82,6 +82,8 @@ sub DESTROY { } } } + +my $sig_pipe = 0; sub listen { my $self = shift; @@ -125,47 +127,74 @@ sub listen { # no routers defined }; + my $app = $self->{app}; - - - $logger->transport( $self->{app} . " going into listen loop", INFO ); + $logger->info("$app inbound: going into listen loop" ); while(1) { my $sock = $self->unix_sock(); my $o; - $logger->debug("Inbound listener calling process()"); - try { $o = $self->process(-1); if(!$o){ - $logger->error( - "Inbound received no data from the Jabber socket in process()"); + $logger->error("$app inbound: received no data from the Jabber socket in process()"); usleep(100000); # otherwise we loop and pound syslog logger with errors } } catch OpenSRF::EX::JabberDisconnected with { - $logger->error("Inbound process lost its ". - "jabber connection. Attempting to reconnect..."); + $logger->error("$app inbound: process lost its jabber connection. Attempting to reconnect..."); $self->initialize; $o = undef; }; + next unless $o; + + while(1) { + # keep trying to deliver the message until we succeed + + my $socket = IO::Socket::UNIX->new( Peer => $sock ); + + unless($socket and $socket->connected) { + $logger->error("$app inbound: unable to connect to inbound socket $sock: $!"); + usleep(50000); # 50 msec + next; + } + + # block until the pipe is ready for writing + my $outfile = ''; + vec($outfile, $socket->fileno, 1) = 1; + my $nfound = select(undef, $outfile, undef, undef); + + next unless $nfound; # should not happen since we're blocking - if($o) { - my $socket = IO::Socket::UNIX->new( Peer => $sock ); - throw OpenSRF::EX::Socket( - "Unable to connect to UnixServer: socket-file: $sock \n :=> $! " ) - unless ($socket->connected); - print $socket freeze($o); - $socket->close; - } + if($nfound == -1) { # select failed + $logger->error("$app inbound: unable to write to socket: $!"); + usleep(50000); # 50 msec + next; + } + + $sig_pipe = 0; + local $SIG{'PIPE'} = sub { $sig_pipe = 1; }; + print $socket freeze($o); + + if($sig_pipe) { + # The attempt to write to the socket failed. Wait a short time then try again. + # Don't bother closing the socket, it will only cause grief + $logger->error("$app inbound: got SIGPIPE, will retry after a short wait..."); + usleep(50000); # 50 msec + next; + } + + $socket->close; + last; + } } - throw OpenSRF::EX::Socket( "How did we get here?!?!" ); + $logger->error("$app inbound: exited process loop"); } 1; -- 2.43.2