[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#668948: apt: Proposal for a better handling of timeouted connection



Package: apt
Version: 0.8.15.10+nmu1
Severity: wishlist

  Hi,

  My laptop is running is different network environments. Sometimes
there is only IPv4, sometimes IPv4 and IPv6 and somtimes IPv4 and
broken IPv6.
  I do not want to always desactivate IPv6 (mostly because I want
to be able to test it). However, when I'm in a broken IPv6 network
environment (IPv6 address setup on my laptop but broken routers),
it is really painful to wait for APT timeout on the IPv6 address
before APT switch to an IPv4. My main mirror is ftp.fr.debian.org
that has both IPv4 and IPv6 addresses:
vdanjean@eyak:/tmp$ host ftp.fr.debian.org
ftp.fr.debian.org is an alias for debian.proxad.net.
debian.proxad.net has address 212.27.32.66
debian.proxad.net has IPv6 address 2a01:e0c:1:1598::2

  So, today, I tried to get the sources of apt and implement
what is described here:
http://www.isc.org/community/blog/201101/how-to-connect-to-a-multi-homed-server-over-tcp
I based my code on the "Select based sample code" provided as APT already uses
select and I patched methods/connect.cc

The patch in in attachment to this message. The patch has not yet
the required quality to be added to APT. However, it allows me to
test the feature and I would say that I'm really enjoyed (ie, even
if the patch does not go upstream, I will keep it locally for me).
Currently, the patch allows to come back to the previous behavior
by defining the APT_SEQUENTIAL_CONNECT environment variable.

So, I've several questions:
- would you be interested by such kind of parallel connection?
- if no, why?
- if yes:
  * the license/copyright would need to be adapted (my code is
    based on http://www.isc.org/files/imce/select-connect_0.c )
  * the code need to be simplify (no need to keep the old behavior)
  * the code need to be checked, in particular for error handling.
    As connections occurs in parallel, I'm not sure what must be
    done with _error. With my current code, _error->Errno can be
    called lots of times for example.
  * if this behavior is kept, is it really necessary to keep the
    "LastUsed" variable? I keep it in my code but I'm not sure
    it is really needed anymore. And getting rid of it would
    simplify the code.

  So, let me know what you think of this.

  Regards,
   Vincent

-- Package-specific info:

-- (/etc/apt/preferences present, but not submitted) --


-- (/etc/apt/sources.list present, but not submitted) --


-- System Information:
Debian Release: wheezy/sid
  APT prefers unstable
  APT policy: (500, 'unstable'), (500, 'testing'), (500, 'stable'), (1, 'experimental')
Architecture: amd64 (x86_64)

Kernel: Linux 3.3.0-trunk-amd64 (SMP w/8 CPU cores)
Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash

Versions of packages apt depends on:
ii  debian-archive-keyring  2010.08.28
ii  gnupg                   1.4.12-4
ii  libc6                   2.13-27
ii  libgcc1                 1:4.6.3-1
ii  libstdc++6              4.6.3-1
ii  zlib1g                  1:1.2.6.dfsg-2

apt recommends no packages.

Versions of packages apt suggests:
ii  apt-doc         <none>
ii  aptitude        0.6.5-1
ii  bzip2           1.0.6-1
ii  dpkg-dev        1.16.2
ii  python-apt      0.8.3+nmu1
ii  synaptic        0.75.6
ii  xz-lzma [lzma]  5.1.1alpha+20110809-3

-- no debconf information
diff --git a/methods/connect.cc b/methods/connect.cc
index a5af1f1..b3b1a80 100644
--- a/methods/connect.cc
+++ b/methods/connect.cc
@@ -37,6 +37,8 @@ static string LastHost;
 static int LastPort = 0;
 static struct addrinfo *LastHostAddr = 0;
 static struct addrinfo *LastUsed = 0;
+static int nbHosts = 0;
+static int use_parallel_connect = 0;
 
 // Set of IP/hostnames that we timed out before or couldn't resolve
 static std::set<string> bad_addr;
@@ -125,6 +127,190 @@ static bool DoConnect(struct addrinfo *Addr,string Host,
    return true;
 }
 									/*}}}*/
+// connect_to_host - Connect to a server				/*{{{*/
+// ---------------------------------------------------------------------
+/* Performs a connection to the server */
+#define TIMEOUT 500
+static bool
+connect_to_host(struct addrinfo *StartHost,struct addrinfo *FirstHostAddr,
+		string Host,int count,int &Fd,
+		unsigned long TimeOut,pkgAcqMethod * Owner) {
+  struct addrinfo *Addr, *NextAddr, **Addrs = NULL;
+  int fd = -1, n, i, j, max = -1, *fds;
+  struct timeval *timeout, timeout0 = { 0, TIMEOUT * 1000},
+    timeout1 = { TimeOut, 0 };
+  fd_set fdset, wrset;
+  // Show a status indicator
+  char Name[NI_MAXHOST];
+  char Service[NI_MAXSERV];
+  
+  fds = new int[count]();//calloc(count, sizeof(*fds));
+  if (fds == NULL) {
+    _error->Errno("calloc",_("Cannot allocate %d file descriptors"),count);
+    return false;
+  }
+  Addrs = new struct addrinfo *[count]();
+  if (Addrs == NULL) {
+    _error->Errno("calloc",_("Cannot allocate %d pointers"),count);
+    free(fds);
+    return false;
+  }
+  FD_ZERO(&fdset);
+  for(Addr = StartHost, i = 0, count = 0; Addr != NULL; Addr = NextAddr) {
+    NextAddr = Addr;
+  
+    // Ignore UNIX domain sockets
+    do {
+      NextAddr = NextAddr->ai_next;
+    } while (NextAddr != 0 && NextAddr->ai_family == AF_UNIX);
+
+    /* If we reached the end of the search list then wrap around to the
+       start */
+    if (NextAddr == 0)
+	NextAddr = FirstHostAddr;
+      
+    // Reached the end of the search cycle
+    if (NextAddr == StartHost)
+      NextAddr = NULL;
+
+
+    Name[0] = 0;
+    Service[0] = 0;
+    getnameinfo(Addr->ai_addr,Addr->ai_addrlen,
+		Name,sizeof(Name),Service,sizeof(Service),
+		NI_NUMERICHOST|NI_NUMERICSERV);
+    Owner->Status(_("Connecting to %s (%s)"),Host.c_str(),Name);
+
+    fd = socket(Addr->ai_family, Addr->ai_socktype, Addr->ai_protocol);
+    if (fd == -1) {
+      /*
+       * If AI_ADDRCONFIG is not supported we will get
+       * EAFNOSUPPORT returned.  Behave as if the address
+       * was not there.
+       */
+      if (errno != EAFNOSUPPORT) {
+	_error->Errno("socket",_("Could not create a socket for %s (f=%u t=%u p=%u)"),
+		      Name,Addr->ai_family,Addr->ai_socktype,Addr->ai_protocol);
+      } else if (NextAddr != NULL)
+	continue;
+    } else if (fd >= FD_SETSIZE) {
+      close(fd);
+    } else {
+      SetNonBlock(fd,true);
+      if (connect(fd, Addr->ai_addr, Addr->ai_addrlen) == -1) {
+	if (errno != EINPROGRESS) {
+	  _error->Errno("connect",_("Cannot initiate the connection "
+				    "to %s:%s (%s)."),Host.c_str(),Service,Name);
+	  close(fd);
+	} else {
+	  /*
+	   * Record the information for this descriptor.
+	   */
+	  fds[i] = fd;
+	  Addrs[i] = Addr;
+	  FD_SET(fd, &fdset);
+	  if (max == -1 || fd > max)
+	    max = fd;
+	  count++;
+	  i++;
+	}
+      } else  {
+	/*
+	 * We connected without blocking.
+	 */
+	LastUsed = Addr;
+	goto done;
+      }
+    }
+
+    if (count == 0)
+      continue;
+    
+    //assert(max != -1);
+    do {
+      if (Addr->ai_next != NULL)
+	timeout = &timeout0;
+      else
+	timeout = &timeout1;
+      
+      /* The write bit is set on both success and failure. */
+      wrset = fdset;
+      n = select(max + 1, NULL, &wrset, NULL, timeout);
+      if (n == 0) {
+	timeout0.tv_usec >>= 1;
+	break;
+      }
+      if (n < 0) {
+	if (errno == EAGAIN || errno == EINTR)
+	  continue;
+	_error->Errno("select", _("Cannot select"));
+	fd = -1;
+	goto done;
+      }
+      for (fd = 0; fd <= max; fd++) {
+	if (FD_ISSET(fd, &wrset)) {
+	  socklen_t len;
+	  int err;
+	  for (j = 0; j < i; j++)
+	    if (fds[j] == fd)
+	      break;
+	  //assert(j < i);
+	  /*
+	   * Test to see if the connect
+	   * succeeded.
+	   */
+	  len = sizeof(err);
+	  n = getsockopt(fd, SOL_SOCKET,
+			 SO_ERROR, &err, &len);
+	  if (n != 0 || err != 0) {
+	    if (n != 0) {
+	      _error->Errno("getsockopt",_("Failed"));
+	    } else {
+	      errno = err;
+	      if(errno == ECONNREFUSED)
+		Owner->SetFailReason("ConnectionRefused");
+	      else if (errno == ETIMEDOUT)
+		Owner->SetFailReason("ConnectionTimedOut");
+	      _error->Errno("connect",_("Could not connect to %s:%s (%s)."),
+			    Host.c_str(),Service,Name);
+	    }
+	    close(fd);
+	    FD_CLR(fd, &fdset);
+	    fds[j] = -1;
+	    count--;
+	    continue;
+	  }
+	  /* Connect succeeded. */
+	  LastUsed = Addrs[j];
+	  goto done;
+	}
+      }
+    } while (timeout == &timeout1 && count != 0);
+  }
+  
+  /* We failed to connect. */
+  fd = -1;
+  
+ done:
+  /* Close all other descriptors we have created. */
+  for (j = 0; j < i; j++)
+    if (fds[j] != fd && fds[j] != -1) {
+      close(fds[j]);
+    }
+  
+  /* Free everything. */
+  if (fds) delete[](fds);
+  if (Addrs) delete[](Addrs);
+  
+  if (fd != -1) {
+    Fd=fd;
+    _error->Discard();
+    return true;
+  }
+
+  return false;
+}
+									/*}}}*/
 // Connect - Connect to a server					/*{{{*/
 // ---------------------------------------------------------------------
 /* Performs a connection to the server */
@@ -154,6 +340,7 @@ bool Connect(string Host,int Port,const char *Service,int DefPort,int &Fd,
 	 freeaddrinfo(LastHostAddr);
 	 LastHostAddr = 0;
 	 LastUsed = 0;
+	 nbHosts = 0;
       }
       
       // We only understand SOCK_STREAM sockets.
@@ -201,6 +388,17 @@ bool Connect(string Host,int Port,const char *Service,int DefPort,int &Fd,
       
       LastHost = Host;
       LastPort = Port;
+
+      /*
+       * Work out how many possible IP we could use.
+       */
+      struct addrinfo *res;
+      for (res = LastHostAddr; res; res = res->ai_next) {
+	// Ignore UNIX domain sockets
+	if (res->ai_family == AF_UNIX)
+	  continue;
+	nbHosts++;
+      }
    }
 
    // When we have an IP rotation stay with the last IP.
@@ -208,6 +406,15 @@ bool Connect(string Host,int Port,const char *Service,int DefPort,int &Fd,
    if (LastUsed != 0)
        CurHost = LastUsed;
    
+   if (use_parallel_connect == 0) {
+     use_parallel_connect=1;
+     if (getenv("APT_SEQUENTIAL_CONNECT") != NULL)
+       use_parallel_connect=-1;
+   }
+   if (use_parallel_connect == 1) {
+     return connect_to_host(CurHost,LastHostAddr,Host,nbHosts,Fd,TimeOut,Owner);
+   }
+
    while (CurHost != 0)
    {
       if (DoConnect(CurHost,Host,TimeOut,Fd,Owner) == true)

Reply to: