[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#376819: busybox-static: 'tr' not ready for prime time



Package: busybox-static
Version: 1:1.1.3-2
Severity: important

testsuite/tr/tr-works (which compares the gnu-tr and bb-tr results)
reveals several problems. These are the relevant ones:

--- gnu tr ---

echo 'abc' |  tr '[:alpha:]' 'A-ZA-Z': ABC

echo 'abc56' |  tr '[:alnum:]' 'A-ZA-Zxxxxxxxxxx': KLMFG

echo '012' |  tr '[:digit:]' 'abcdefghi': abc

echo 'abc56' |  tr '[:lower:]' '[:upper:]': ABC56

echo '  ' |  tr '[:space:]' '12345': 552
echo '  ' |  tr '[:blank:]' '12': 22

--- busybox tr --- BusyBox v1.1.3 (Debian 1:1.1.3-2) multi-call binary

echo 'abc' | busybox tr '[:alpha:]' 'A-ZA-Z': Gbc

echo 'abc56' | busybox tr '[:alnum:]' 'A-ZA-Zxxxxxxxxxx': Cbc56

echo '012' | busybox tr '[:digit:]' 'abcdefghi': 012

echo 'abc56' | busybox tr '[:lower:]' '[:upper:]': abc56

echo '  ' | busybox tr '[:space:]' '12345':

echo '  ' | busybox tr '[:blank:]' '12':

------------------------------------------------------------------------

bb coreutils/tr.c svn r15557/r15560 seems to produce the correct
results. The attached patch yields identical results (gnu-tr vs.
bb-tr) on my box, but don't take my word for it.


Cheers,
Cristian
--- tr.c	29 Jun 2006 16:18:07 -0000	1.1.1.2
+++ tr.c	4 Jul 2006 13:29:44 -0000	1.2
@@ -2,26 +2,18 @@
 /*
  * Mini tr implementation for busybox
  *
+ ** Copyright (c) 1987,1997, Prentice Hall   All rights reserved.
+ *
+ * The name of Prentice Hall may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
  * Copyright (c) Michiel Huisjes
  *
  * This version of tr is adapted from Minix tr and was modified
  * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Original copyright notice is retained at the end of this file.
+ * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  */
 
 #include <stdio.h>
@@ -32,61 +24,61 @@
 #include <sys/types.h>
 #include "busybox.h"
 
+// Even with -funsigned-char, gcc still complains about char as an array index.
+
+#define GCC_IS_STUPID int
+
 #define ASCII 0377
 
 /* some "globals" shared across this file */
 static char com_fl, del_fl, sq_fl;
-static short in_index, out_index;
 /* these last are pointers to static buffers declared in tr_main */
-static unsigned char *poutput;
-static unsigned char *pvector;
-static unsigned char *pinvec, *poutvec;
-
-#define input bb_common_bufsiz1
+static unsigned char *poutput, *pvector, *pinvec, *poutvec;
 
 static void convert(void)
 {
-	short read_chars = 0;
-	short c, coded;
-	short last = -1;
+	int read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1;
 
 	for (;;) {
+		// If we're out of input, flush output and read more input.
+
 		if (in_index == read_chars) {
-			if ((read_chars = read(0, input, BUFSIZ)) <= 0) {
+			if (out_index) {
+				if (write(1, (char *) poutput, out_index) != out_index)
+					bb_error_msg_and_die(bb_msg_write_error);
+				out_index = 0;
+			}
+
+			if ((read_chars = read(0, bb_common_bufsiz1, BUFSIZ)) <= 0) {
 				if (write(1, (char *) poutput, out_index) != out_index)
 					bb_error_msg(bb_msg_write_error);
 				exit(0);
 			}
 			in_index = 0;
 		}
-		c = input[in_index++];
+		c = bb_common_bufsiz1[in_index++];
 		coded = pvector[c];
 		if (del_fl && pinvec[c])
 			continue;
 		if (sq_fl && last == coded && (pinvec[c] || poutvec[coded]))
 			continue;
 		poutput[out_index++] = last = coded;
-		if (out_index == BUFSIZ) {
-			if (write(1, (char *) poutput, out_index) != out_index)
-				bb_error_msg_and_die(bb_msg_write_error);
-			out_index = 0;
-		}
 	}
 
 	/* NOTREACHED */
 }
 
-static void map(register unsigned char *string1, unsigned int string1_len,
-		register unsigned char *string2, unsigned int string2_len)
+static void map(unsigned char *string1, unsigned int string1_len,
+		unsigned char *string2, unsigned int string2_len)
 {
 	unsigned char last = '0';
 	unsigned int i, j;
 
 	for (j = 0, i = 0; i < string1_len; i++) {
 		if (string2_len <= j)
-			pvector[string1[i]] = last;
+			pvector[(GCC_IS_STUPID)string1[i]] = last;
 		else
-			pvector[string1[i]] = last = string2[j++];
+			pvector[(GCC_IS_STUPID)string1[i]] = last = string2[j++];
 	}
 }
 
@@ -95,7 +87,7 @@ static void map(register unsigned char *
  *   Escapes, e.g.,  \a     ==>  Control-G
  *	 Character classes, e.g. [:upper:] ==> A ... Z
  */
-static unsigned int expand(const char *arg, register unsigned char *buffer)
+static unsigned int expand(const char *arg, unsigned char *buffer)
 {
 	unsigned char *buffer_start = buffer;
 	int i, ac;
@@ -116,7 +108,8 @@ static unsigned int expand(const char *a
 			arg += 3; /* Skip the assumed a-z */
 		} else if (*arg == '[') {
 			arg++;
-			if (ENABLE_FEATURE_TR_CLASSES && *arg++ == ':') {
+			i = *arg++;
+			if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
 				if (strncmp(arg, "alpha", 5) == 0) {
 					for (i = 'A'; i <= 'Z'; i++)
 						*buffer++ = i;
@@ -124,12 +117,12 @@ static unsigned int expand(const char *a
 						*buffer++ = i;
 				}
 				else if (strncmp(arg, "alnum", 5) == 0) {
+					for (i = '0'; i <= '9'; i++)
+						*buffer++ = i;
 					for (i = 'A'; i <= 'Z'; i++)
 						*buffer++ = i;
 					for (i = 'a'; i <= 'z'; i++)
 						*buffer++ = i;
-					for (i = '0'; i <= '9'; i++)
-						*buffer++ = i;
 				}
 				else if (strncmp(arg, "digit", 5) == 0)
 					for (i = '0'; i <= '9'; i++)
@@ -140,10 +133,15 @@ static unsigned int expand(const char *a
 				else if (strncmp(arg, "upper", 5) == 0)
 					for (i = 'A'; i <= 'Z'; i++)
 						*buffer++ = i;
-				else if (strncmp(arg, "space", 5) == 0)
-					strcat((char*)buffer, " \f\n\r\t\v");
-				else if (strncmp(arg, "blank", 5) == 0)
-					strcat((char*)buffer, " \t");
+				else if (strncmp(arg, "space", 5) == 0) {
+				    const char s[] = "\t\n\v\f\r ";
+					strcat((char*)buffer, s);
+					buffer += sizeof(s) - 1;
+				}
+				else if (strncmp(arg, "blank", 5) == 0) {
+					*buffer++ = '\t';
+					*buffer++ = ' ';
+				}
 				/* gcc gives a warning if braces aren't used here */
 				else if (strncmp(arg, "punct", 5) == 0) {
 					for (i = 0; i <= ASCII; i++)
@@ -156,13 +154,13 @@ static unsigned int expand(const char *a
 							*buffer++ = i;
 				}
 				else {
-					strcat((char*)buffer, "[:");
-					arg++;
+					*buffer++ = '[';
+					*buffer++ = ':';
 					continue;
 				}
 				break;
 			}
-			if (ENABLE_FEATURE_TR_EQUIV && *arg++ == '=') {
+			if (ENABLE_FEATURE_TR_EQUIV && i == '=') {
 				*buffer++ = *arg;
 				/* skip the closing =] */
 				arg += 3;
@@ -173,7 +171,6 @@ static unsigned int expand(const char *a
 				arg -= 2;
 				continue;
 			}
-			i = *arg++;
 			ac = *arg++;
 			while (i <= ac)
 				*buffer++ = i++;
@@ -187,7 +184,7 @@ static unsigned int expand(const char *a
 
 static int complement(unsigned char *buffer, int buffer_len)
 {
-	register short i, j, ix;
+	short i, j, ix;
 	char conv[ASCII + 2];
 
 	ix = 0;
@@ -204,12 +201,12 @@ static int complement(unsigned char *buf
 
 int tr_main(int argc, char **argv)
 {
-	register unsigned char *ptr;
+	unsigned char *ptr;
 	int output_length=0, input_length;
 	int idx = 1;
 	int i;
 	RESERVE_CONFIG_BUFFER(output, BUFSIZ);
-	RESERVE_CONFIG_UBUFFER(vector, ASCII+1);
+	RESERVE_CONFIG_BUFFER(vector, ASCII+1);
 	RESERVE_CONFIG_BUFFER(invec,  ASCII+1);
 	RESERVE_CONFIG_BUFFER(outvec, ASCII+1);
 
@@ -243,57 +240,20 @@ int tr_main(int argc, char **argv)
 	}
 
 	if (argv[idx] != NULL) {
-		input_length = expand(argv[idx++], (unsigned char*)input);
+		input_length = expand(argv[idx++], bb_common_bufsiz1);
 		if (com_fl)
-			input_length = complement((unsigned char*)input, input_length);
+			input_length = complement(bb_common_bufsiz1, input_length);
 		if (argv[idx] != NULL) {
 			if (*argv[idx] == '\0')
 				bb_error_msg_and_die("STRING2 cannot be empty");
 			output_length = expand(argv[idx], (unsigned char*)output);
-			map((unsigned char*)input, input_length, (unsigned char*)output, output_length);
+			map(bb_common_bufsiz1, input_length, (unsigned char*)output, output_length);
 		}
 		for (i = 0; i < input_length; i++)
-			invec[(unsigned char)input[i]] = TRUE;
+			invec[(GCC_IS_STUPID)bb_common_bufsiz1[i]] = TRUE;
 		for (i = 0; i < output_length; i++)
-			outvec[(unsigned char)output[i]] = TRUE;
+			outvec[(GCC_IS_STUPID)output[i]] = TRUE;
 	}
 	convert();
 	return (0);
 }
-
-/*
- * Copyright (c) 1987,1997, Prentice Hall
- * All rights reserved.
- *
- * Redistribution and use of the MINIX operating system in source and
- * binary forms, with or without modification, are permitted provided
- * that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * Neither the name of Prentice Hall nor the names of the software
- * authors or contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND
- * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-

Reply to: