[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: iconv...



Andrey Chernomyrdin wrote:
> 
> Hi!
> 
> Вот тут столкнулся с проблемой...
> - --
> $ echo -ne "\214\n" | iconv -t windows-1251 -f koi8-r
> iconv: illegal input sequence at position 0
> - --
> 
> Что делать и как с этим боротся, то есть не обрабатывает все символы которые
> не изветсны в windows-1251...

Маленький патч для glibc из MDK RE, автор Дмитрий Левин.
Добавляет опцию -r, которая позволяет определить символ, замещающий
неизвестные. Без нее все как обычно.
Не знаю, войдет ли в 2.2.2.

Rgrds, AEN
>
--- glibc-2.2-orig/iconv/iconv_prog.c	Thu Sep  7 22:56:23 2000
+++ glibc-2.2/iconv/iconv_prog.c	Wed Nov 15 04:48:21 2000
@@ -59,6 +59,7 @@
   { "list", 'l', NULL, 0, N_("list all known coded character sets") },
   { NULL, 0, NULL, 0, N_("Output control:") },
   { NULL, 'c', NULL, 0, N_("omit invalid characters from output") },
+  { "replace", 'r', "SYMBOL", OPTION_ARG_OPTIONAL, N_("replace invalid characters with specified symbol") },
   { "output", 'o', "FILE", 0, N_("output file") },
   { "silent", 's', NULL, 0, N_("suppress warnings") },
   { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
@@ -99,6 +100,7 @@
 
 /* If nonzero omit invalid character from output.  */
 static int omit_invalid;
+static char replace_invalid;
 
 /* Prototypes for the functions doing the actual work.  */
 static int process_block (iconv_t cd, char *addr, size_t len, FILE *output);
@@ -314,6 +316,10 @@
       /* Omit invalid characters from output.  */
       omit_invalid = 1;
       break;
+    case 'r':
+      /* Replace invalid characters.  */
+      replace_invalid = (arg && *arg) ? *arg : '?';
+      break;
     case OPT_VERBOSE:
       verbose = 1;
       break;
@@ -356,6 +362,23 @@
   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
 }
 
+static int
+write_invalid( iconv_t cd, char **addr, size_t *len, FILE *output )
+{
+  int errno_save = errno;
+  int needed_from = ((__gconv_t) cd)->__steps->__min_needed_from;
+  if ( fwrite( &replace_invalid, 1, 1, output ) < 1 || ferror( output ) )
+  {
+    /* Error occurred while printing replace symbol. */
+    error (0, 0, _("conversion stopped due to problem in writing the output"));
+    return -1;
+  }
+
+  errno = errno_save;
+  *addr += needed_from;
+  *len -= needed_from;
+  return 0;
+}
 
 static int
 process_block (iconv_t cd, char *addr, size_t len, FILE *output)
@@ -424,22 +447,34 @@
 	  switch (errno)
 	    {
 	    case EILSEQ:
+	      if ( replace_invalid )
+	      {
+	        if ( write_invalid( cd, &addr, &len, output ) )
+	          return -1;
+	        else
+	          break;
+	      }
 	      error (0, 0, _("illegal input sequence at position %ld"),
 		     (long) (addr - start));
-	      break;
+	      return -1;
 	    case EINVAL:
+	      if ( replace_invalid )
+	      {
+	        if ( write_invalid( cd, &addr, &len, output ) )
+	          return -1;
+	        else
+	          break;
+	      }
 	      error (0, 0, _("\
 incomplete character or shift sequence at end of buffer"));
-	      break;
+	      return -1;
 	    case EBADF:
 	      error (0, 0, _("internal error (illegal descriptor)"));
-	      break;
+	      return -1;
 	    default:
 	      error (0, 0, _("unknown iconv() error %d"), errno);
-	      break;
+	      return -1;
 	    }
-
-	  return -1;
 	}
     }
 

Reply to: