[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: win -> dos



В Пнд, 21.04.2003, в 16:45, Комаров Алексей пишет:
> Здравствуйте debian-russian,
> 
>   Имеется куча каталогов и в них куча подкаталогов. В каждом каталоге
>   присутствует файл 1.txt, который содержит описание графических
>   файлов находящихся в данном каталоге.
>   Изначально было создано все в W2k, а теперь переносим под Woody 3.0
>   на ftp сервер.
>   Необходимо все текстовые файлы конвертировать из win в dos, чтоб
>   можно было искать в них описания (пока ищем в far'e, он по ftp
>   хорошо понимает dos-ую кодировку).
>   
>   Есть  идея - сканировать раз в сутки все *.txt файлы и делать из них
>   один  большой  текстовый  файл, а уже в нем искать описания. Как это
>   сделать проще?
for i in */*/*; do cat $i | xcode -a +w > $i.new && mv -f $i.net $i ; done
Потянет?
>   (файл типа:
>   путь к файлу на диске.
>   текстовое содержание файла.
> 
>   путь к файлу на диске.
>   текстовое содержание файла.
> 
>   ...)
> 
> --
> С Уважением,
> Комаров Алексей                   mailto:komarov@ecodom-style.ru
-- 
---------------------------------------------------------
echo '16i[q]sa[ln0=aln100%Pln100/snlbx]sb20293A2058554E494Csnlbxq'|dc

Best Regards                    mailto:srg@csu.ac.ru
Mokeev Sergey                   http://sux.csu.ac.ru/
                                ICQ UIN:168860082

// xcode.C (formerly auto2unix.cc)
// This program tries to determine input document encoding
// and to convert it to koi8, CP-1251 or cp866.

// Written  by Andrey V. Lukyanov on May 14, 1997
// Last modified on May 18, 1997

// Updated to convert to /don't kill me/ cp1251 (oh, god! I hate it!)
// instead of KOI-8 (original)
// by Cyril Rotmistrovsky
// Updated to make conversions to cp1251, cp866 and koi8-r (default)
// depending on flags
// by Cyril Rotmistrovsky
// Modified on Oct 19, 1997
// Modified by Cyril Rotmistrovsky to be compiled by Watcom C++ 10.0
// (Oh, God! what a non-standard compiler!)
// Last modified on Jun 18 1998

// Modified by Igor V. Krassikov (KIV without Co)
// for quoted-printable decodeing

// Name changed to xcode 


#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#ifndef KOI8_WIN
#define KOI8_WIN \
    "\x94\x83\xaa\x8f\x90\xa9\x93\x84\x92\x91\x95\xaf\xac\xab\xad\xae" \
    "\x80\x81\x82\xb4\xbe\xb9\xbb\xb7\xb3\xb2\xbf\xb5\xb8\xbd\xba\xb6" \
    "\x9d\x8a\xa5\xb1\xa6\x99\x88\x87\x8b\xa4\xa3\x98\x8e\x8d\x8c\x96" \
    "\x97\x9c\x85\xb0\x86\x89\xa1\xa2\x9b\x9f\xa0\x9a\xa8\xa7\x9e\xbc" \
    "\xfe\xe0\xe1\xf6\xe4\xe5\xf4\xe3\xf5\xe8\xe9\xea\xeb\xec\xed\xee" \
    "\xef\xff\xf0\xf1\xf2\xf3\xe6\xe2\xfc\xfb\xe7\xf8\xfd\xf9\xf7\xfa" \
    "\xde\xc0\xc1\xd6\xc4\xc5\xd4\xc3\xd5\xc8\xc9\xca\xcb\xcc\xcd\xce" \
    "\xcf\xdf\xd0\xd1\xd2\xd3\xc6\xc2\xdc\xdb\xc7\xd8\xdd\xd9\xd7\xda"
#endif
#ifndef KOI8_ALT
#define KOI8_ALT \
 "\xc4\xb3\xda\xbf\xc0\xd9\xc3\xb4" \
 "\xc2\xc1\xc5\xdf\xdc\xdb\xdd\xde" \
 "\xb0\xb1\xb2\xf4\xfe\xf9\xfb\xf7" \
 "\xf3\xf2\xff\xf5\xf8\xfd\xfa\xf6" \
 "\xcd\xba\xd5\xf1\xd6\xc9\xb8\xb7" \
 "\xbb\xd4\xd3\xc8\xbe\xbd\xbc\xc6" \
 "\xc7\xcc\xb5\xf0\xb6\xb9\xd1\xd2" \
 "\xcb\xcf\xd0\xca\xd8\xd7\xce\xfc" \
 "\xee\xa0\xa1\xe6\xa4\xa5\xe4\xa3" \
 "\xe5\xa8\xa9\xaa\xab\xac\xad\xae" \
 "\xaf\xef\xe0\xe1\xe2\xe3\xa6\xa2" \
 "\xec\xeb\xa7\xe8\xed\xe9\xe7\xea" \
 "\x9e\x80\x81\x96\x84\x85\x94\x83" \
 "\x95\x88\x89\x8a\x8b\x8c\x48\x8e" \
 "\x8f\x9f\x90\x91\x92\x93\x86\x82" \
 "\x9c\x9b\x87\x98\x9d\x99\x97\x9a"

#endif

#define NUMCOD 5

unsigned char* destTab[]={
	(unsigned char *)KOI8_ALT,
	(unsigned char *)KOI8_WIN
};

char * help[]={
	"This program tries to determine input document encoding\n",
	"and to convert it to desired one\n",
	"\n",
	"Written by Andrey V. Lukyanov on May 14, 1997  "
	"Last modified on May 18, 1997\n",
	"Updated by Cyril Rotmistrovsky                 "
	"Last modified on Oct 19, 1997\n",
	"Updated by Igor V. Krassikov                   "
	"Last modified on Oct 29, 1998\n",
	"\n",
	"Usage: %s [-h|-H|-?] [-w|-k|-a] [+w|+k|+a|+i|+m] [-q] "
	"[input [output]]\n",
	"-q to disable quoted-pritable decoding\n",
	"-k to set   koi8      output\n",
	"-w to set   cp1251    output\n",
	"-a to set   cp866     output(default)\n",
	"+k to force koi8      input\n",
	"+w to force cp1251    input\n",
	"+a to force cp866     input\n",
	"+i to force iso8859-5 input\n",
	"+m to force mac       input\n",
	0
};

char * encName[]={"koi8","cp866","cp1251","iso8859-5","mac"};

unsigned char recode_table[NUMCOD][128]={
	{   //koi8
	128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
	144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
	160,161,162,163,164,165,166,167,168,169,170,171,172,'-',174,175,
	176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
	192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
	208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
	224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
	240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
	},
	{ //dos
	225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
	242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
	193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
	128,129,130,131,132,133,134,135,136,137,186,139,140,141,142,143,
	144,145,146,147,148,149,150,151,152,153,154,191,156,157,158,159,
	160,161,162,176,164,165,166,167,168,169,170,171,172,173,174,175,
	210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
	179,163,'+','+','+','+','+','+',184,'+','+','+','+','+',190,' '
	},
	{ //win
	'+','+', 39,'+', 34,'+','+','+','+','+','+', 39,'+','+','+','+',
	'+', 39, 39, 34, 34,'+','+','-','-','*','+', 39,'+','+','+','+',
	' ','+','I','+','+','+','+','+',179,188,'E', 34,'+','+','*','I',
	184,'+','i',199,'*','*','*','*',163,'N','e', 34,'j','S','s','i',
	225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
	242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
	193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
	210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209
	},
	{ //iso
	'+','+','+','+','+','+','+','+','+','+','+','+','+','+','+','+',
	'+','+','+','+','+','+','+','+','+','+','+','+','+','+','+','+',
	'*',179,'*','*','*','*','*','*','*','*','*','*','*','*','*','*',
	225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
	242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
	193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
	210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
	'*',163,'*','*','*','*','*','*','*','*','*','*','*','*','*','*'
	},
	{ //mac
	225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
	242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
	'*',184,'*','*','*','*','*','I','*','*','*','*','*','*','*','*',
	'*',179,177,178,'i','*',199,'J','E','e','I','i','*','*','*','*',
	'j','S','*','*','f','*','*','*','*','*','*','*','*','*','*','s',
	'-','-', 34, 34, 39, 39,'*', 39,'*','*','*','*','N',163,179,209,
	193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
	210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,'*'
	}
};
int letter_frequency[32]=
{
	125,1606,331,65,598,1714,22,356,168,1312,206,636,1050,658,1295,2259,
	544,447,887,1049,1217,572,200,823,398,400,355,168,67,78,285,4
};

// Added by KIV
// Yes, we have Quoted-Printable...
int hasQP = 0;
int chkQP = 1;
inline int getQP(int first, int second)
{
	if (first  > '9') first  = toupper(first)  - 'A' + 1 +'9';
	if (second > '9') second = toupper(second) - 'A' + 1 +'9';
	return (first-'0')*16 + (second-'0');
};

int main(int argc,char ** argv)
{
	int c=0, a=0, c_old, i, j, force=-1;
	int     decide[NUMCOD];
	double  rating[NUMCOD];
	FILE *f,*out=stdout,*in=stdin;
	int enc=0;
	int I=0;

	for(I=1;argc>I&&(*argv[I]=='+'||*argv[I]=='-'||*argv[I]=='/');I++) {
		switch(argv[I][1]) {
		default: HELP:
                case 'h': case 'H': case '?':
			do {
				char * c = argv[0] + strlen(argv[0]);
				while( (c != argv[0]) && (*c != '\\') && 
				       (*c != '/')) c--;
				if (c != argv[0]) c++;
				for(j=0;help[j];j++) printf(help[j],c);
				return 1;
			} while(0);
//#ifndef __WATCOMC__
		case 'k': case 'K': (*argv[I]=='+'?force:enc)=0; break;
		case 'a': case 'A': (*argv[I]=='+'?force:enc)=1; break;
		case 'w': case 'W': (*argv[I]=='+'?force:enc)=2; break;
		case 'q': case 'Q': chkQP = 0; break;
//#else
			// I hate Watcom for it's stupidity!
			// ?: - operator CAN be lvalue
			// by both K&R and ANSI standard.
			// Why Watcom does not support it?
//            case 'k': case 'K': if(*argv[I]=='+')force=0; else enc=0; break;
//            case 'a': case 'A': if(*argv[I]=='+')force=1; else enc=1; break;
//            case 'w': case 'W': if(*argv[I]=='+')force=2; else enc=2; break;
//#endif
		case 'i': case 'I': force=3; if(*argv[I]!='+') goto HELP; 
			break;
		case 'm': case 'M': force=4; if(*argv[I]!='+') goto HELP; 
			break;
		}
        }

	if(I<argc) {
		in=fopen(argv[I++],"rt");
		assert(in!=0);
        }

	for(i=0; i<NUMCOD; i++){ decide[i]=1; rating[i]=0;}

	f=tmpfile();
	if(f==NULL){ perror("\"tmpfile\""); exit(1);}

	while(1) {
		c_old=c;
		c=getc(in);
		if(c=='\r') continue;
		if(c_old=='\r' && c!='\n') fputc('\n',f);
		if(c==EOF) break;

		// Added by KIV
		if (chkQP && (c == '=')) {
			//Maybe, QP?
			int q1 = getc(in);
			if (!isxdigit(q1)) {
				if (hasQP && ((q1 == '\r')||(q1 == '\n'))) {
					q1 = getc(in);
					if ((q1 == '\r')||(q1 == '\n')) 
						continue;
				};
				ungetc(q1,in);
			} else {
				int q2 = getc(in);
				if (!isxdigit(q2)) {
					ungetc(q2,in);
					ungetc(q1,in);
				} else {
					c = getQP(q1,q2);
					hasQP = 1;
				};
			}
		};
		
		if(c>=128)
			for(i=0; i<NUMCOD; i++)
				if(recode_table[i][c-128]>=192)
					rating[i]+=letter_frequency
						[((int)recode_table[i][c-128]-
						  192)%32];
		fputc(c,f);
        }

	for(i=0; i<NUMCOD; i++)
		for(j=0; j<NUMCOD; j++)
			if(i!=j && rating[i]<rating[j]) decide[i]=0;

	for(i=NUMCOD-1; i>=0; i--) if(decide[i]) a=i;

	fprintf(stderr,"Guessed input encoding: %s\n",encName[a]);
	if(force>=0)
		fprintf(stderr,"Forced input encoding: %s\n",encName[a=force]);

	fprintf(stderr,"Output encoding: %s\n",encName[enc]);

	rewind(f);

	if(I<argc) {
		out=fopen(argv[I++],"wt");
		assert(out!=0);
        }

	while(1) {
		c=fgetc(f);
		if(c==EOF) break;
		if(c>=128) c=recode_table[a][c-128];
		if(enc&&c>=128) c=destTab[enc-1][c-128];
		fputc(c,out);
        }
	//for(i=0; i<NUMCOD; i++) printf("\n%d\n", rating[i]);
	return(0);
}

Reply to: