Re: win -> dos
В Пнд, 21.04.2003, в 16:45, Комаров Алексей пишет:
> Здравствуйте debian-russian,
>
> Имеется куча каталогов и в них куча подкаталогов. В каждом каталоге
> присутствует файл 1.txt, который содержит описание графических
> файлов находящихся в данном каталоге.
> Изначально было создано все в W2k, а теперь переносим под Woody 3.0
> на ftp сервер.
> Необходимо все текстовые файлы конвертировать из win в dos, чтоб
> можно было искать в них описания (пока ищем в far'e, он по ftp
> хорошо понимает dos-ую кодировку).
>
> Есть идея - сканировать раз в сутки все *.txt файлы и делать из них
> один большой текстовый файл, а уже в нем искать описания. Как это
> сделать проще?
for i in */*/*; do cat $i | xcode -a +w > $i.new && mv -f $i.net $i ; done
Потянет?
> (файл типа:
> путь к файлу на диске.
> текстовое содержание файла.
>
> путь к файлу на диске.
> текстовое содержание файла.
>
> ...)
>
> --
> С Уважением,
> Комаров Алексей mailto:komarov@ecodom-style.ru
--
---------------------------------------------------------
echo '16i[q]sa[ln0=aln100%Pln100/snlbx]sb20293A2058554E494Csnlbxq'|dc
Best Regards mailto:srg@csu.ac.ru
Mokeev Sergey http://sux.csu.ac.ru/
ICQ UIN:168860082
// xcode.C (formerly auto2unix.cc)
// This program tries to determine input document encoding
// and to convert it to koi8, CP-1251 or cp866.
// Written by Andrey V. Lukyanov on May 14, 1997
// Last modified on May 18, 1997
// Updated to convert to /don't kill me/ cp1251 (oh, god! I hate it!)
// instead of KOI-8 (original)
// by Cyril Rotmistrovsky
// Updated to make conversions to cp1251, cp866 and koi8-r (default)
// depending on flags
// by Cyril Rotmistrovsky
// Modified on Oct 19, 1997
// Modified by Cyril Rotmistrovsky to be compiled by Watcom C++ 10.0
// (Oh, God! what a non-standard compiler!)
// Last modified on Jun 18 1998
// Modified by Igor V. Krassikov (KIV without Co)
// for quoted-printable decodeing
// Name changed to xcode
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#ifndef KOI8_WIN
#define KOI8_WIN \
"\x94\x83\xaa\x8f\x90\xa9\x93\x84\x92\x91\x95\xaf\xac\xab\xad\xae" \
"\x80\x81\x82\xb4\xbe\xb9\xbb\xb7\xb3\xb2\xbf\xb5\xb8\xbd\xba\xb6" \
"\x9d\x8a\xa5\xb1\xa6\x99\x88\x87\x8b\xa4\xa3\x98\x8e\x8d\x8c\x96" \
"\x97\x9c\x85\xb0\x86\x89\xa1\xa2\x9b\x9f\xa0\x9a\xa8\xa7\x9e\xbc" \
"\xfe\xe0\xe1\xf6\xe4\xe5\xf4\xe3\xf5\xe8\xe9\xea\xeb\xec\xed\xee" \
"\xef\xff\xf0\xf1\xf2\xf3\xe6\xe2\xfc\xfb\xe7\xf8\xfd\xf9\xf7\xfa" \
"\xde\xc0\xc1\xd6\xc4\xc5\xd4\xc3\xd5\xc8\xc9\xca\xcb\xcc\xcd\xce" \
"\xcf\xdf\xd0\xd1\xd2\xd3\xc6\xc2\xdc\xdb\xc7\xd8\xdd\xd9\xd7\xda"
#endif
#ifndef KOI8_ALT
#define KOI8_ALT \
"\xc4\xb3\xda\xbf\xc0\xd9\xc3\xb4" \
"\xc2\xc1\xc5\xdf\xdc\xdb\xdd\xde" \
"\xb0\xb1\xb2\xf4\xfe\xf9\xfb\xf7" \
"\xf3\xf2\xff\xf5\xf8\xfd\xfa\xf6" \
"\xcd\xba\xd5\xf1\xd6\xc9\xb8\xb7" \
"\xbb\xd4\xd3\xc8\xbe\xbd\xbc\xc6" \
"\xc7\xcc\xb5\xf0\xb6\xb9\xd1\xd2" \
"\xcb\xcf\xd0\xca\xd8\xd7\xce\xfc" \
"\xee\xa0\xa1\xe6\xa4\xa5\xe4\xa3" \
"\xe5\xa8\xa9\xaa\xab\xac\xad\xae" \
"\xaf\xef\xe0\xe1\xe2\xe3\xa6\xa2" \
"\xec\xeb\xa7\xe8\xed\xe9\xe7\xea" \
"\x9e\x80\x81\x96\x84\x85\x94\x83" \
"\x95\x88\x89\x8a\x8b\x8c\x48\x8e" \
"\x8f\x9f\x90\x91\x92\x93\x86\x82" \
"\x9c\x9b\x87\x98\x9d\x99\x97\x9a"
#endif
#define NUMCOD 5
unsigned char* destTab[]={
(unsigned char *)KOI8_ALT,
(unsigned char *)KOI8_WIN
};
char * help[]={
"This program tries to determine input document encoding\n",
"and to convert it to desired one\n",
"\n",
"Written by Andrey V. Lukyanov on May 14, 1997 "
"Last modified on May 18, 1997\n",
"Updated by Cyril Rotmistrovsky "
"Last modified on Oct 19, 1997\n",
"Updated by Igor V. Krassikov "
"Last modified on Oct 29, 1998\n",
"\n",
"Usage: %s [-h|-H|-?] [-w|-k|-a] [+w|+k|+a|+i|+m] [-q] "
"[input [output]]\n",
"-q to disable quoted-pritable decoding\n",
"-k to set koi8 output\n",
"-w to set cp1251 output\n",
"-a to set cp866 output(default)\n",
"+k to force koi8 input\n",
"+w to force cp1251 input\n",
"+a to force cp866 input\n",
"+i to force iso8859-5 input\n",
"+m to force mac input\n",
0
};
char * encName[]={"koi8","cp866","cp1251","iso8859-5","mac"};
unsigned char recode_table[NUMCOD][128]={
{ //koi8
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,'-',174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
},
{ //dos
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
128,129,130,131,132,133,134,135,136,137,186,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,191,156,157,158,159,
160,161,162,176,164,165,166,167,168,169,170,171,172,173,174,175,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
179,163,'+','+','+','+','+','+',184,'+','+','+','+','+',190,' '
},
{ //win
'+','+', 39,'+', 34,'+','+','+','+','+','+', 39,'+','+','+','+',
'+', 39, 39, 34, 34,'+','+','-','-','*','+', 39,'+','+','+','+',
' ','+','I','+','+','+','+','+',179,188,'E', 34,'+','+','*','I',
184,'+','i',199,'*','*','*','*',163,'N','e', 34,'j','S','s','i',
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209
},
{ //iso
'+','+','+','+','+','+','+','+','+','+','+','+','+','+','+','+',
'+','+','+','+','+','+','+','+','+','+','+','+','+','+','+','+',
'*',179,'*','*','*','*','*','*','*','*','*','*','*','*','*','*',
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
'*',163,'*','*','*','*','*','*','*','*','*','*','*','*','*','*'
},
{ //mac
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
'*',184,'*','*','*','*','*','I','*','*','*','*','*','*','*','*',
'*',179,177,178,'i','*',199,'J','E','e','I','i','*','*','*','*',
'j','S','*','*','f','*','*','*','*','*','*','*','*','*','*','s',
'-','-', 34, 34, 39, 39,'*', 39,'*','*','*','*','N',163,179,209,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,'*'
}
};
int letter_frequency[32]=
{
125,1606,331,65,598,1714,22,356,168,1312,206,636,1050,658,1295,2259,
544,447,887,1049,1217,572,200,823,398,400,355,168,67,78,285,4
};
// Added by KIV
// Yes, we have Quoted-Printable...
int hasQP = 0;
int chkQP = 1;
inline int getQP(int first, int second)
{
if (first > '9') first = toupper(first) - 'A' + 1 +'9';
if (second > '9') second = toupper(second) - 'A' + 1 +'9';
return (first-'0')*16 + (second-'0');
};
int main(int argc,char ** argv)
{
int c=0, a=0, c_old, i, j, force=-1;
int decide[NUMCOD];
double rating[NUMCOD];
FILE *f,*out=stdout,*in=stdin;
int enc=0;
int I=0;
for(I=1;argc>I&&(*argv[I]=='+'||*argv[I]=='-'||*argv[I]=='/');I++) {
switch(argv[I][1]) {
default: HELP:
case 'h': case 'H': case '?':
do {
char * c = argv[0] + strlen(argv[0]);
while( (c != argv[0]) && (*c != '\\') &&
(*c != '/')) c--;
if (c != argv[0]) c++;
for(j=0;help[j];j++) printf(help[j],c);
return 1;
} while(0);
//#ifndef __WATCOMC__
case 'k': case 'K': (*argv[I]=='+'?force:enc)=0; break;
case 'a': case 'A': (*argv[I]=='+'?force:enc)=1; break;
case 'w': case 'W': (*argv[I]=='+'?force:enc)=2; break;
case 'q': case 'Q': chkQP = 0; break;
//#else
// I hate Watcom for it's stupidity!
// ?: - operator CAN be lvalue
// by both K&R and ANSI standard.
// Why Watcom does not support it?
// case 'k': case 'K': if(*argv[I]=='+')force=0; else enc=0; break;
// case 'a': case 'A': if(*argv[I]=='+')force=1; else enc=1; break;
// case 'w': case 'W': if(*argv[I]=='+')force=2; else enc=2; break;
//#endif
case 'i': case 'I': force=3; if(*argv[I]!='+') goto HELP;
break;
case 'm': case 'M': force=4; if(*argv[I]!='+') goto HELP;
break;
}
}
if(I<argc) {
in=fopen(argv[I++],"rt");
assert(in!=0);
}
for(i=0; i<NUMCOD; i++){ decide[i]=1; rating[i]=0;}
f=tmpfile();
if(f==NULL){ perror("\"tmpfile\""); exit(1);}
while(1) {
c_old=c;
c=getc(in);
if(c=='\r') continue;
if(c_old=='\r' && c!='\n') fputc('\n',f);
if(c==EOF) break;
// Added by KIV
if (chkQP && (c == '=')) {
//Maybe, QP?
int q1 = getc(in);
if (!isxdigit(q1)) {
if (hasQP && ((q1 == '\r')||(q1 == '\n'))) {
q1 = getc(in);
if ((q1 == '\r')||(q1 == '\n'))
continue;
};
ungetc(q1,in);
} else {
int q2 = getc(in);
if (!isxdigit(q2)) {
ungetc(q2,in);
ungetc(q1,in);
} else {
c = getQP(q1,q2);
hasQP = 1;
};
}
};
if(c>=128)
for(i=0; i<NUMCOD; i++)
if(recode_table[i][c-128]>=192)
rating[i]+=letter_frequency
[((int)recode_table[i][c-128]-
192)%32];
fputc(c,f);
}
for(i=0; i<NUMCOD; i++)
for(j=0; j<NUMCOD; j++)
if(i!=j && rating[i]<rating[j]) decide[i]=0;
for(i=NUMCOD-1; i>=0; i--) if(decide[i]) a=i;
fprintf(stderr,"Guessed input encoding: %s\n",encName[a]);
if(force>=0)
fprintf(stderr,"Forced input encoding: %s\n",encName[a=force]);
fprintf(stderr,"Output encoding: %s\n",encName[enc]);
rewind(f);
if(I<argc) {
out=fopen(argv[I++],"wt");
assert(out!=0);
}
while(1) {
c=fgetc(f);
if(c==EOF) break;
if(c>=128) c=recode_table[a][c-128];
if(enc&&c>=128) c=destTab[enc-1][c-128];
fputc(c,out);
}
//for(i=0; i<NUMCOD; i++) printf("\n%d\n", rating[i]);
return(0);
}
Reply to:
- References:
- win -> dos
- From: Комаров Алексей <komarov@ecodom-style.ru>