[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

dpkg version comparison



Jason, apt also seems to suffer from the "integer overflow when comparing
versions bug", but differently: instead of getting random values, it
seems to decide all big numbers are exactly the same. Weird.


Anyway. The following code should correctly replace the code:

    vl=0;  if (isdigit(*vp)) vl= strtol(val,(char**)&val,10);
    rl=0;  if (isdigit(*rp)) rl= strtol(ref,(char**)&ref,10);
    if (vl != rl) return vl - rl;

Voila:

    {
            int first_diff = 0;
            while( *val == '0' ) val++;
            while( *ref == '0' ) ref++;
            while (isdigit(*val) && isdigit(*ref)) {
                    if (!first_diff) first_diff = *val - *ref;
                    val++; ref++;
            }
            if (isdigit(*val)) return 1;
            if (isdigit(*ref)) return -1;
            if (first_diff) return first_diff;
    }

(This does away with the vl and rl temporaries)

The explanation goes like:

	* first, skip leading 0's, so our number begins with [1-9]. 0 is
	  represented by "" (ie, the next character is a non-digit)

	* then find the first difference between the two numbers, from left
	  to write. if the numbers are the same length, then this'll be the
	  most significant difference. record this as "first_diff".

	* then continue on to the end of one of the numbers.

	* if one of the numbers continues on, afterwards, it has more
	  significant digits, so is bigger, and that's our answer.

	* if neither continue on, then first_diff says which is bigger. if
	  either is bigger, that's our answer.

A couple more notes about the code. The main loop looks like:

  for (;;) {
    vp= val;  while (*vp && !isdigit(*vp)) vp++;
    rp= ref;  while (*rp && !isdigit(*rp)) rp++;
    for (;;) {
      vc= val == vp ? 0 : *val++;
      rc= ref == rp ? 0 : *ref++;
      if (!rc && !vc) break;
      if (vc && !isalpha(vc)) vc += 256; /* assumes ASCII character set */
      if (rc && !isalpha(rc)) rc += 256;
      if (vc != rc) return vc - rc;
    }
    val= vp;
    ref= rp;
    vl=0;  if (isdigit(*vp)) vl= strtol(val,(char**)&val,10);
    rl=0;  if (isdigit(*rp)) rl= strtol(ref,(char**)&ref,10);
    if (vl != rl) return vl - rl;
    if (!*val && !*ref) return 0;
    if (!*val) return -1;
    if (!*ref) return +1;
  }

The lines:
    val= vp;
    ref= rp;

are redundant: we only break from the previous loop if !rc && !vc. vc is
true iff val == vp or *val == 0, and if *val == 0, then that was the first
non-digit anyway. Likewise for rc/ref. So the inner loop can be simplified
to:

	while ( (*val && !isdigit(*val)) || (*ref && !isdigit(*ref)) ) {
		vc = *val; rc = *ref;
		if (vc && !isalnum(vc)) vc += 256;
		if (rc && !isalnum(rc)) rc += 256;
		if (vc != rc) return vc - rc;
		val++; ref++;
	}

doing away with vp and rp entirely.

Additionally, there's no real need for the lines:
    if (!*val) return -1;
    if (!*ref) return +1;

at the end of the outer loop. They can be taken care of by considering
anything that ends with a number to actually end with a an empty
alpha-string, if whatever you're comparing against ends with a string.
This means things that really end with an alpha-string get treated as
ending with an empty number (equivalent to 0) and then an empty string,
but that's life.

This is important, because the above code assumes that the empty string
compares less than everything: if we're going to introduce ~, then that's
no longer the case.

We can introduce a ~ just by manipulating vc and rc, a la:

	if (vc == '~') vc = -1;
	if (rc == '~') rc = -1;

(and making sure we don't then go and add 256 to it :)

This leaves the code looking like:

	for(;;) {
		while ( (*val && !isdigit(*val)) || (*ref && !isdigit(*ref)) ) {
			vc = *val; 
			if (vc == '~') vc = -1;
			if (vc > 0 && !isalnum(vc)) vc += 256;

			rc = *ref;
			if (rc == '~') rc = -1;
			if (rc > 0 && !isalnum(rc)) rc += 256;

			if (vc != rc) return vc - rc;

			val++; ref++;
		}

		{
			int first_diff = 0;
			while( *val == '0' ) val++;
			while( *ref == '0' ) ref++;
			while (isdigit(*val) && isdigit(*ref)) {
				if (!first_diff) first_diff = *val - *ref;
				val++; ref++;
			}
			if (isdigit(*val)) return 1;
			if (isdigit(*ref)) return -1;
			if (first_diff) return first_diff;
		}

		if (!*val && !*ref) return 0;
	}

This simplifies slightly further (changing the outer loop to a while, macroifying vc/rc), to:


/* assume ascii; warning: evaluates x multiple times! */
#define order(x) ((x) == '~' ? -1 \
                  : isdigit((x)) ? 0 \
                  : !(x) ? 0 \
                  : isalpha((x)) ? (x) \
                  : (x) + 256)

static int verrevcmp(const char *val, const char *ref) {
	if (!val) val = "";
	if (!ref) ref = "";

	while (*val || *ref) {
		while ( (*val && !isdigit(*val)) || (*ref && !isdigit(*ref)) ) {
			int vc = order(*val), rc = order(*ref);
			if (vc != rc) return vc - rc;
			val++; ref++;
		}

		{
			int first_diff = 0;
			while ( *val == '0' ) val++;
			while ( *ref == '0' ) ref++;
			while (isdigit(*val) && isdigit(*ref)) {
				if (!first_diff) first_diff = *val - *ref;
				val++; ref++;
			}
			if (isdigit(*val)) return 1;
			if (isdigit(*ref)) return -1;
			if (first_diff) return first_diff;
		}
	}
	return 0;
}

At any rate, that code's a lot easier to follow for me. If you don't want
to just rewrite the whole thing, then minimal changes and iwj's style,
would be something like:

--- lib/vercmp.c.orig   Mon Apr  9 21:00:44 2001
+++ lib/vercmp.c        Mon Apr  9 22:30:54 2001
@@ -34,8 +34,8 @@
 
 static int verrevcmp(const char *val, const char *ref) {
   int vc, rc;
-  long vl, rl;
   const char *vp, *rp;
+  int fd;
 
   if (!val) val= "";
   if (!ref) ref= "";
@@ -46,18 +46,25 @@
       vc= val == vp ? 0 : *val++;
       rc= ref == rp ? 0 : *ref++;
       if (!rc && !vc) break;
-      if (vc && !isalpha(vc)) vc += 256; /* assumes ASCII character set */
-      if (rc && !isalpha(rc)) rc += 256;
+      if (vc == '~') vc= -1;
+      if (rc == '~') rc= -1;
+      if (vc > 0 && !isalpha(vc)) vc += 256; /* assumes ASCII character set */
+      if (rc > 0 && !isalpha(rc)) rc += 256;
       if (vc != rc) return vc - rc;
     }
     val= vp;
     ref= rp;
-    vl=0;  if (isdigit(*vp)) vl= strtol(val,(char**)&val,10);
-    rl=0;  if (isdigit(*rp)) rl= strtol(ref,(char**)&ref,10);
-    if (vl != rl) return vl - rl;
+    fd= 0;
+    while (*val == '0') val++;
+    while (*ref == '0') ref++;
+    while (isdigit(*val) && isdigit(*ref)) {
+      if (!fd) fd= *val - *ref;
+      val++; ref++;
+    }
+    if (isdigit(*val)) return 1;
+    if (isdigit(*ref)) return -1;
+    if (fd) return fd;
     if (!*val && !*ref) return 0;
-    if (!*val) return -1;
-    if (!*ref) return +1;
   }
 }



Separately, the minimal patches should be like:

--- lib/vercmp.c.orig   Mon Apr  9 21:00:44 2001
+++ lib/vercmp.c.bignum Mon Apr  9 22:37:44 2001
@@ -34,8 +34,8 @@
 
 static int verrevcmp(const char *val, const char *ref) {
   int vc, rc;
-  long vl, rl;
   const char *vp, *rp;
+  int fd;
 
   if (!val) val= "";
   if (!ref) ref= "";
@@ -52,9 +52,16 @@
     }
     val= vp;
     ref= rp;
-    vl=0;  if (isdigit(*vp)) vl= strtol(val,(char**)&val,10);
-    rl=0;  if (isdigit(*rp)) rl= strtol(ref,(char**)&ref,10);
-    if (vl != rl) return vl - rl;
+    fd= 0;
+    while (*val == '0') val++;
+    while (*ref == '0') ref++;
+    while (isdigit(*val) && isdigit(*ref)) {
+      if (!fd) fd= *val - *ref;
+      val++; ref++;
+    }
+    if (isdigit(*val)) return 1;
+    if (isdigit(*ref)) return -1;
+    if (fd) return fd;
     if (!*val && !*ref) return 0;
     if (!*val) return -1;
     if (!*ref) return +1;

to cope with numbers bigger than a long; and:

--- lib/vercmp.c.orig   Mon Apr  9 21:00:44 2001
+++ lib/vercmp.c.tilde  Mon Apr  9 22:39:02 2001
@@ -46,8 +46,10 @@
       vc= val == vp ? 0 : *val++;
       rc= ref == rp ? 0 : *ref++;
       if (!rc && !vc) break;
-      if (vc && !isalpha(vc)) vc += 256; /* assumes ASCII character set */
-      if (rc && !isalpha(rc)) rc += 256;
+      if (vc == '~') vc = -1;
+      if (rc == '~') rc = -1;
+      if (vc > 0 && !isalpha(vc)) vc += 256; /* assumes ASCII character set */
+      if (rc > 0 && !isalpha(rc)) rc += 256;
       if (vc != rc) return vc - rc;
     }
     val= vp;
@@ -56,8 +58,6 @@
     rl=0;  if (isdigit(*rp)) rl= strtol(ref,(char**)&ref,10);
     if (vl != rl) return vl - rl;
     if (!*val && !*ref) return 0;
-    if (!*val) return -1;
-    if (!*ref) return +1;
   }
 }

top cope with ~pre versions. Only the rewritten function and the full test
have had much testing.

Cheers,
aj

-- 
Anthony Towns <aj@humbug.org.au> <http://azure.humbug.org.au/~aj/>
I don't speak for anyone save myself. GPG signed mail preferred.

``_Any_ increase in interface difficulty, in exchange for a benefit you
  do not understand, cannot perceive, or don't care about, is too much.''
                      -- John S. Novak, III (The Humblest Man on the Net)



Reply to: