[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: UTF-8



On Wed, Sep 11, 2002 at 07:54:19PM +1000, Jay Hap-hang Yu wrote:
> 
> It'd be great if someone come up with some perl script to convert
> filenames to UTF8. I guess that can be done with iconv + perl
> 

Spend the whole morning on this one. Actually, it is an excuse
to get away from other things. :) Minimal test and don't recommand
to anyone except for testing purpose. I don't think store filename
in UTF-8 now is wise. 

Well... someone have to eat the crab first!

Need libreadline-dev to compile. Read the source first.
Patch, bug reports are welcomed

-- 
hashao|       故贵以贱为本,高以下为基。是以侯王自称孤、寡、不谷。此非
hashao|     以贱为本邪?非乎?故致誉无誉。是故不欲□□如玉,珞珞如石。
/* Released under the Gnu Public License (GPL). 
 * It might make your file system unusable. Use it on your own risk. 
 * 	-- hashao 
 *
 * Please visit: http://www.debian.org/intl/zh/
 */

/* Convert the encoding of a directory name recursively. 
 *   + content of symbolic links are also converted. (good/bad?)
 */

/* Usage: dirconv -f encode -t encode [-w] filename
 *        -w:	Do not warn.
 */

/* To compile: 
 *    gcc -lreadline -o dirconv dirconv.c 
 * You need libreadline-dev to compile it, of course.
 */

/* ChangeLog:
 *
 * 2002-09-15:
 *   + first release. Only limited test.
 *   + content of symbolic links are also converted. (good/bad?)
 *   + Don't know what will happen on vfat systems. I will not
 *     try it on my own vfat system!
 *   + Use it on your own risk. (hashao)
 */

#define _GNU_SOURCE 1

#define SYMLEN 	4096 /* Symbolic buffer length. */

#include <unistd.h>
#include <errno.h>
#include <limits.h>
#include <ftw.h>
#include <iconv.h>
#include <stdio.h>
#include <readline/readline.h>

iconv_t icd = (iconv_t)(-1);	/* global iconv handler. */
int maxlen = 256;	/* max len of the constr. */
char *convstr = NULL;	/* buff hold the converted file name. */

int do_iconv(const char *file)
{
    char *inbuf, *outbuf;
    size_t inleft, outleft;
    size_t retval;

    memset(convstr, 0, maxlen);
    inbuf = (char*)file;
    outbuf  = convstr;
    inleft = strlen(inbuf);
    outleft = maxlen;
    retval = iconv(icd, NULL, NULL, NULL, NULL);
    retval = iconv(icd, &inbuf, &inleft, &outbuf, &outleft);

    /* convert ok. */
    if (retval != (size_t)(-1))
	return retval;
    /* encoding error. */
    if (errno != E2BIG)
	return retval;

    /* No enough output space. */
    free(convstr);
    maxlen = maxlen + MB_LEN_MAX*inleft;
    convstr = (char*)malloc(maxlen);
    return do_iconv(file);
}
	    
int do_rename(const char* file, struct FTW *s)
{
    int retval;
    char *newfullpath;

    newfullpath = (char*)malloc(sizeof(convstr)+ s->base + 1);
    memset(newfullpath, 0, sizeof(convstr) + s->base + 1);
    strncat(newfullpath, file, s->base);
    strcat(newfullpath, convstr);
    retval = rename(file, newfullpath);
    if (!retval)
	printf("converted %s to %s\n", file, newfullpath);
    else
	printf("!!! failed to convert %s\n", file);
    free(newfullpath);
}

/* Convert a symbolic link and its content. */
int do_symlink(const char* file, struct FTW *s)
{
    int retval;
    char *newfullpath;
    char symbuf[SYMLEN];
    char *newlink;

    newfullpath = (char*)malloc(sizeof(convstr)+ s->base + 1);
    memset(newfullpath, 0, sizeof(convstr) + s->base + 1);
    strncat(newfullpath, file, s->base);
    strcat(newfullpath, convstr);
    memset(symbuf, 0, SYMLEN);

    /* Get the content of symbolic link and convert it too. */
    retval = readlink(file, symbuf, SYMLEN);
    if (retval == -1){
	printf("!!! failed to read symbolic link: %s\n", file);
	return -1;
    }
    {
	char *inbuf, *outbuf;
	size_t inleft, outleft;

	memset(convstr, 0, maxlen);
	inbuf = symbuf;
	outbuf = newlink = (char*)malloc(strlen(symbuf)*MB_LEN_MAX);
	inleft = strlen(symbuf);
	outleft = SYMLEN;
	retval = iconv(icd, NULL, NULL, NULL, NULL);
	retval = iconv(icd, &inbuf, &inleft, &outbuf, &outleft);
	if (retval == (size_t)(-1)){
	    printf("!!! Cannot iconv symlink content %s for %s\n",
		    symbuf, file);
	    return -1;
	}
    }
    /* Relink to the new symlink. */
    retval = symlink(newlink, newfullpath);
    if (retval){
	printf("!!! Cannot create symlink %s to %s.\n", newfullpath, newlink);
	return -1;
    }else {
	printf("converted symlink %s to %s points to \n", file, 
		newfullpath, newlink);
	retval = unlink(file);
	if (retval){
	    printf("!!! Cannot unlink symlink %s\n", file);
	    return -1;
	}
    }
    return 0;
}

int walk_func(const char* file, const struct stat *sb, int flag, struct FTW *s)
{
    int retval;
    char *basefile;

    basefile = (char*)(file + s->base);
    retval = do_iconv(basefile);

    if (retval == (size_t)(-1)){
	printf("!!! Cannot iconv %s, skip.\n", basefile);
	if (S_ISDIR(sb->st_mode))
	    printf("In dir %s\n", file);
	return 0;
    }

    if S_ISLNK(sb->st_mode) {
	do_symlink(file, s);
    }else {
	do_rename(file, s);
    }
	
    if (S_ISDIR(sb->st_mode))
	printf("In dir %s\n", file);

    /* Return non-0 will stop ftw(). */
    return 0;

}


int do_walk(char* root)
{
    int retval;

    //printf("==== Test(1): FTW_PHYS (don't follow symbolic links) ====\n");
    /* Cannot set FTW_CHDIR. Maybe a bug in libc. With it set, stop
     * recursive at the first level. */
    /* Do depth first, do no follow symbolic directory. */
    retval = nftw(root, walk_func, 100, FTW_DEPTH|FTW_PHYS);
    return retval;
}

/* Hold configuration for this program. */
struct conf {
    char* from;
    char* to;
    char* root; /* Root diretory to convert. */
    int warn;	/* warning flag. */
};

int do_init(struct conf *cf)
{
    convstr = (char*)malloc(maxlen);
    icd = iconv_open(cf->to, cf->from);
    return -1;
}

/* Read command line options. */
int do_opt(int argc, char* argv[], struct conf *cf)
{
    int retval;
    int c;

    cf->to = cf->from = cf->root = NULL;
    cf->warn = 1;
    while(1) {
	int curoptind = optind ? optind : 1;
	c = getopt(argc, argv, "-t:f:w");
	if (c == -1)
	    break;
	switch (c) {
	    case 1:
		if (!cf->root)
		    cf->root = optarg;
		break;
	    case 't':
		cf->to = optarg;
		break;
	    case 'f':
		cf->from = optarg;
		break;
	    case 'w':
		cf->warn = 0;
		break;
	    case '?':
		return -1;
		break;
	}
    }
    if(!(cf->to && cf->from && cf->root)) {
	printf("Usage: %s [-w] -f encode -t encode filename \n", argv[0]);
	return -1;
    }
    return 0;
}
    
int main(int argc, char* argv[])
{
    int retval;
    struct conf cf;
    struct stat *myst;
    
    retval = do_opt(argc, argv, &cf);
    if(retval != 0)
	return 1;
    
    do_init(&cf);
    if (icd == (iconv_t)(-1)) {
	printf("cannot do conversion between %s and %s\n", cf.to, cf.from);
	return 2;
    }

    myst = (struct stat*) malloc(sizeof(struct stat));
    retval = stat(cf.root, myst);
    free(myst);
    if (retval == -1) {
	printf("%s cannot be converted.\n", cf.root);
	perror(cf.root);
	return 3;
    }
    if (cf.warn){
	/* Warn the user. */
	char* c;
	printf ("I am going to convert %s and its subdirectory (if any) \n"
		"from [%s] to [%s]. It is **dangerous**!!! It might mess \n"
		"up all your file system!\n"
		"=== %s: [%s] -> [%s] ===\n",
		cf.root, cf.from, cf.to, cf.root, cf.from, cf.to);
	c = readline("Are you sure?! [N/y]: ");
	if (strcasecmp(c, "y"))
	    return 0;
	c = readline("really? [N/y]: ");
	if (strcasecmp(c, "y"))
	    return 0;
    }
    
    do_walk(cf.root);
}

Reply to: