[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

more dirconv



A new release. Do multiple files from command line.
It is good enough for me. I am done with it.

dirconv: charset/encoding converter for the name of directries. Ver: 20021009.


-- 
hashao|       孔德之容,惟道是從。
hashao|       道之為物,惟恍惟惚。惚兮恍兮,其中有象;恍兮惚兮,其中有
hashao|     物。窈兮冥兮,其中有精;其精甚真,其中有信。
hashao|       自今及古,其名不去,以閱眾甫。吾何以知眾甫之狀哉?以此。
-- 
| This message was re-posted from debian-chinese-gb@lists.debian.org
| and converted from gb2312 to big5 by an automatic gateway.
/* dirconv: charset/encoding converter for directries. Ver: 20021009.
 * Released under the Gnu Public License (GPL). 
 * It might make your file system unusable. Use the program on your own risk. 
 * 	-- hashao 
 *
 * Please visit: http://www.debian.org/intl/zh/
 */

/* Convert the encoding of a directory name recursively. 
 *   + References in symbolic links are also converted. (good/bad?)
 */

/* Usage: dirconv -f encode -t encode [-w|-m|-r] filename ... 
 *        -r:	Recursively convert directories [Default not].
 *        -w:	Warn off. Do not warn at the beginning.
 *        -m:	Also follow other mounted file system (horror!)
 */

/* To compile: 
 *    gcc -lreadline -o dirconv dirconv.c 
 * You need libreadline-dev to compile it, of course.
 */

/* ChangeLog:
 * 2002-10-08:
 *   + Preserve ownership (lchown) of symlinks. (do_symlink)
 *     Mode has no meaning for symlinks.
 *   + For symlink, if reference changed, make the conversion even if
 *     the symlink itself is not changed. (do_symlink):
 *        a->x ===> a->y relink a->y.
 *        b->r ===> b->r do nothing.
 *
 * 2002-10-03:
 *   + Will do multiple files/dirs in the command line. (main())
 *   + Rearrange warning messages.
 *   + Do no override exist files. (do_rename, do_symlink)
 *     Maybe we can do a forced override, but I don't think so.
 *   + Check for malloc returns. Could fail especially on recursive
 *     memory allocation.
 *
 * 2002-09-21:
 *   + Add function to convert single file. (do_single())
 *   + Set default to only convert a single filename.
 *   + Add option '-r' to recursively convert a directory.
 *
 * 2002-09-20:
 *   + Make config structure global so we can use it. C++ is better here.
 *   + Add -m option to also do mounted file system if asked.
 *   + Plug a couple of mem leak for rename symlink. (do_symlink())
 *   + Better error display. Many perror()'s, printf()'s.
 *
 * 2002-09-15:
 *   + first release. Only limited test.
 *   + content of symbolic links are also converted. (good/bad?)
 *   + Don't know what will happen on vfat systems. I will not
 *     try it on my own vfat system!
 *   + Use it on your own risk. (hashao)
 */

#define _GNU_SOURCE 1	/* Need for nftw() in ftw.h */

#include <unistd.h>
#include <errno.h>
#include <limits.h>
#include <ftw.h>
#include <iconv.h>
#include <stdio.h>
#include <readline/readline.h>

#define SYMLEN 	4096 /* Symbolic link buffer length. */

static iconv_t icd = (iconv_t)(-1);	/* global iconv handler. */
/* buff hold the converted file name. Avoid too much malloc. */
static char *convstr;
static int maxlen = 256;	/* max len of the convstr. */

struct str_node {
    char* item;
    struct str_node *next;
};

struct slist {
    struct str_node *head;
    struct str_node *tail;
};

/* Hold configuration for this program. */
struct conf {
    char* from;	/* from encoding. */
    char* to;	/* to encoding. */
    struct slist filelist;	/* filenames we need to process. */
    int warn;	/* flag on warning. */
    int mount;	/* flag on follow mounted file system. */
    int recur;	/* Flag on recursive convert. */
    int allbase; /* Convert basename only for all files. */
};


/* Gobal variable holding our configuration. */
static struct conf gcf;

/* Utility: append string to list. */
static void append_str_list(struct slist* list, char* str)
{
    struct str_node *curnode;
    curnode = (struct str_node*)malloc(sizeof(struct str_node));
    if(!curnode){
	printf("!!!Fatal error: malloc failed in append_str_list.\n");
	perror("curnode");
	exit(-5);
    }
    curnode->item = str;
    curnode->next = NULL;
    if (!list->head) {
	list->head = curnode;
	list->tail = curnode;
    }else{
	list->tail->next = curnode;
	list->tail = curnode;
    }
}

/* Handy iconv function use a global output buffer. */
int do_iconv(const char *filename)
{
    char *inbuf, *outbuf;
    size_t inleft, outleft;
    size_t retval;

    memset(convstr, 0, maxlen);
    inbuf = (char*)filename;
    outbuf  = convstr;
    inleft = strlen(inbuf);
    outleft = maxlen;
    retval = iconv(icd, NULL, NULL, NULL, NULL);
    retval = iconv(icd, &inbuf, &inleft, &outbuf, &outleft);

    /* convert ok. */
    if (retval != (size_t)(-1))
	return retval;

    /* encoding error. */
    if (errno != E2BIG){
	perror(filename);
	return retval;
    }

    /* No enough output space. expend and tail recursive. */
    free(convstr);
    maxlen = maxlen + MB_LEN_MAX*inleft;
    convstr = (char*)malloc(maxlen);
    if(!convstr){
	printf("!!!Fatal error: Failed to allocate memory in do_iconv.\n");
	perror("do_iconv");
	exit(-5);
    }
    return do_iconv(filename);
}

/* Function to re-encode plain filename. Offset at which conversion start. */
int do_rename(const char* file, int offset)
{
    int retval;
    char *newfullpath;
    char *basefile;
    char *newfile;
    struct stat tmpstat; /* stat pointer for checking file exist. */

    basefile = (char*)(file + offset);
    retval = do_iconv(basefile);

    if (retval == (size_t)(-1)){
	printf("!!! Cannot iconv %s, skip.\n", basefile);
	return -1;
    }

    /* Save a copy of converted string. */
    newfile = strdup(convstr);

    /* Construct the new fullpath. */
    newfullpath = (char*)malloc(strlen(newfile)+ offset + 1);
    if(!newfullpath){
	printf("!!!Fatal error: Failed to allocate memory in do_rename.\n");
	perror("newfullpath");
	exit(-5);
    }
    memset(newfullpath, 0, strlen(newfile) + offset + 1);
    strncat(newfullpath, file, offset);
    strcat(newfullpath, newfile);
    free(newfile);

    /* Check if new file exist. rename() will override exist files. */
    retval = lstat(newfullpath, &tmpstat);
    
    if(!retval){
	int notsame;
	notsame = strcmp(file, newfullpath);
	if(notsame) {
	    printf("!!!Cannot convert %s to %s. File already exists.\n", 
		    file, newfullpath);
	}else{
	    printf("!!!Cannot convert %s to %s. File did not change.\n", 
		    file, newfullpath);
	}
	free(newfullpath);
	return -1;
    }

    retval = rename(file, newfullpath);
    if (!retval){
	printf("Converted %s to %s\n", file, newfullpath);
    }
    else {
	printf("!!! Failed to convert %s\n", file);
	perror(file);
    }
    free(newfullpath);
}

/* Convert a symbolic link and its link content. */
int do_symlink(const char* file, int offset)
{
    int retval, notexist;
    char *newfullpath;
    char symbuf[SYMLEN];
    char *newlink;
    char *basefile;
    char *newfile;
    struct stat tmpstat; /* stat pointer for checking file exist. */

    basefile = (char*)(file + offset);
    retval = do_iconv(basefile);

    if (retval == (size_t)(-1)){
	printf("!!! Cannot iconv %s. Skip.\n", basefile);
	return -1;
    }

    /* Save a copy of converted string. */
    newfile = strdup(convstr);

    /* Construct the new fullpath. */
    newfullpath = (char*)malloc(strlen(newfile)+ offset + 1);
    if(!newfullpath){
	printf("!!!Fatal error: Failed to allocate memory in do_symlink.\n");
	perror("newfullpath");
	exit(-5);
    }
    memset(newfullpath, 0, strlen(newfile) + offset + 1);
    strncat(newfullpath, file, offset);
    strcat(newfullpath, newfile);
    memset(symbuf, 0, SYMLEN);
    free(newfile);

    /* Check if new file exist. rename() will override exist files. */
    notexist = lstat(newfullpath, &tmpstat);
    if(!notexist){
	/* For symlinks, we might want to change its reference too. */
	int notsame;
	notsame = strcmp(file, newfullpath);
	if(notsame) {
	    printf("!!!Cannot convert %s to %s. File already exists.\n", 
		    file, newfullpath);
	    free(newfullpath);
	    return -1;
	}
    }

    /* Get the content of symbolic link and convert it too. */
    retval = readlink(file, symbuf, SYMLEN);
    if (retval == -1){
	printf("!!! Failed to read symbolic link: %s\n", file);
	perror(file);
    }else{
	retval = do_iconv(symbuf);
	if (retval == (size_t)(-1)){
	    printf("!!! Cannot iconv symlink content %s for %s\n",
		    symbuf, file);
	}else{
	    int do_unlink = 0;
	    /* Save convstr. */
	    newlink = strdup(convstr);
	    
	    if(!notexist){
		int notsame;

		notsame = strcmp(symbuf, newlink);
		if(!notsame) {
		    printf("!!!Cannot convert %s to %s. Symlink did not change.\n", 
			    file, newfullpath);
		    free(newfullpath);
		    free(newlink);
		    return -1;
		}
	    }

	    /* Relink to the new symlink. */
	    retval = symlink(newlink, newfullpath);
	    if (retval == -1){
		printf("!!! Cannot create symlink %s to %s\n", 
			newfullpath, newlink);
		perror(newlink);
	    }else {
		struct stat oldstat;

		/* Restore ownership of the new symlink. */
		retval = lstat(file, &oldstat);
		retval = lchown(newfullpath, oldstat.st_uid, oldstat.st_gid);
		if(retval == -1){
		    printf("!!!Failed to chown of symlink %s, uid: %d, "
			    "gid: %d.\n", newfullpath, oldstat.st_uid,
			    oldstat.st_gid);
		    perror(newfullpath);
		}

		retval = unlink(file);
		if (retval == -1){
		    printf("!!! Cannot unlink symlink %s\n", file);
		    perror(file);
		}
		printf("Converted symlink %s to %s points to \n", file, 
			newfullpath, newlink);

	    }
	    free(newlink);
	}
    }
    free(newfullpath);
    return retval;
}

/* Function past to nftw(). called for every file. */
int walk_func(const char* file, const struct stat *sb, int flag, struct FTW *s)
{
    int retval;

    if(flag & FTW_DNR) {
	printf("!!! %s is a directory but its content cannot be read.\n", file);
    }

    /* Only convert the basename. we do depth first. */

    if S_ISLNK(sb->st_mode) {
	retval = do_symlink(file, s->base);
    }else {
	retval = do_rename(file,  s->base);
    }
	
    if (S_ISDIR(sb->st_mode))
	printf("In dir %s\n", file);

    /* Return non-0 will stop ftw(). */
    return 0;
}

/* Start waling a dir tree from the 'root'. */
int do_walk(char* root, struct conf *cf)
{
    int retval;
    int flag;

    /* Do not |FTW_CHDIR. It will stop when hit by permission denies. */
    /* Do depth first, do no follow symbolic directory. */
    flag = FTW_DEPTH|FTW_PHYS;
    if (cf->mount){
	flag |= FTW_MOUNT;
    }

    retval = nftw(root, walk_func, 100, flag);
    if (retval == -1) {
	perror("nftw Error");
    }
    return retval;
}

/* convert encoding of a single file. */
int do_single(char* filename, struct conf *cf)
{
    int retval;
    struct stat myst;
    int offset; /* Offset of the basename. */
    char *c;

    offset = 0;
    if(!(cf->allbase)) {
	while(c = readline("Do you want to convert basename only? [y/n/a] "))
	{
	    if(!strcasecmp(c, "y") || (!strcasecmp(c, "a"))){
		char *p;

		offset = 0;
		p = filename;
		/* 1 past last / except the / at the end of the string. */
		while(*p){
		    if ((*p == '/') && *(p+1))
			offset = p + 1 - filename;
		    p++;
		}
		printf("Only convert basename: %s\n", filename+offset);
		if(!strcasecmp(c, "a"))
		    cf->allbase = 1;
		break;
	    }else if (!strcasecmp(c, "n")){
		offset = 0;
		printf("Convert the whole path: %s\n", filename);
		break;
	    }else{
		printf("Please chose:\n"
			"y) Yes\n"
			"n) No\n"
			"a) All\n");
	    }
	    free(c);
	}
	free(c);
    }
    
    /* Find out symlink or not. */
    retval = lstat(filename, &myst);
    if S_ISLNK(myst.st_mode) {
	do_symlink(filename, offset);
    }else {
	do_rename(filename, offset);
    }
}

int do_init(struct conf *cf)
{
    int retval = 0;

    convstr = (char*)malloc(maxlen);
    if (!convstr)
    {
	printf("!!!Fatal Error: Failled to malloc in do_init.\n");
	perror("convstr");
	retval = -5;
    }
    icd = iconv_open(cf->to, cf->from);
    if(icd == (iconv_t) -1) {
	printf("Cannot convert from %s to %s.\n", cf->from, cf->to);
	perror("Iconv");
	retval = -1;
    }
    return retval;
}

/* Read command line options. */
int do_opt(int argc, char* argv[], struct conf *cf)
{
    int retval;
    int c;

    cf->to = cf->from = NULL;
    cf->filelist.head = cf->filelist.tail = NULL;
    cf->warn = 1;
    cf->mount = cf->recur = 0;
    while(1) {
	int curoptind = optind ? optind : 1;
	c = getopt(argc, argv, "-wmrt:f:");
	if (c == -1)
	    break;
	switch (c) {
	    case 1:
		append_str_list(&(cf->filelist), optarg);
		break;
	    case 't':
		cf->to = optarg;
		break;
	    case 'f':
		cf->from = optarg;
		break;
	    case 'w':
		cf->warn = 0;
		break;
	    case 'm':
		cf->mount = 1;
		break;
	    case 'r':
		cf->recur = 1;
		break;
	    case '?':
		return -1;
		break;
	}
    }
    if(!(cf->to && cf->from && cf->filelist.head)) {
	return -1;
    }
    return 0;
}

/* confirm if user want to continue. */
int print_confirm(struct conf *cf, struct slist* files)
{
    /* Warn the user. */
    char* c;
    char* subdir;
    int retval;
    struct str_node *cnode;

    retval = 0;

    /* Only valid with recursive mode on. */
    if (cf->recur){
	subdir = strdup("and their subdirectories (if any) \n");
    }else{
	subdir = strdup(" ");
    }

    printf("\n"
	   "=============================================================\n"
	   "=============================================================\n"
	   "\n"
	   );
    printf ("I am going to convert following files %sfrom [%s] to [%s].\n",
	    subdir, cf->from, cf->to);
    printf("It is **dangerous**!!! All your file systems might be messed up!\n\n");
    free(subdir);

    /* Only warn about mounted system on recursive mode. */
    if(cf->recur){
	if (cf->mount){
	    printf("[Cross filesystem] ");
	}else{
	    printf("[Same filesystem] ");
	}
	printf("[Recursively]\n");
    }

    /* Print files that will be processed. */
    printf("======  [%s] --> [%s] ======:\n", cf->from, cf->to);
    for(cnode=files->head; cnode != NULL; cnode = cnode->next){
	printf("*) %s\n", cnode->item);
    }

    printf("\n");
    c = readline("Are you sure?! [y/N]: ");
    if (!strcasecmp(c, "y")) {
	free(c);
	c = readline("Really? [y/N]: ");
	if (!strcasecmp(c, "y"))
	    retval = 1;
    }
    free(c);
    return retval;
}

/* Check if the files exist. */
static void check_files(struct slist *files)
{
    struct stat myst; /* stat pointer for checking file exist. */
    struct str_node *cnode; /* Current node in filelist. */
    struct str_node *pnode; /* parent node in filelist. */
    int retval = 0;

    cnode = pnode = files->head;
    printf("------------------- checking file state. --------------------\n");
    while(cnode) {
	char* root;

	root = cnode->item;

	/* Make sure the 'root' is a valid file or directory. */
	retval = lstat(root, &myst);
	if (retval == -1) {
	    printf("%s cannot be converted.\n", root);
	    perror(root);
	    /* Get rid of invalid nodes. */
	    if(cnode == files->head){
		files->head = cnode->next;
		pnode = cnode->next;
		free(cnode);
		cnode = pnode;
	    } else {
		pnode->next = cnode->next;
		free(cnode);
		cnode = pnode->next;
	    }
	}else{
	    pnode = cnode;
	    cnode = cnode->next;
	}
    }
    printf("-------------------------- Done -----------------------------\n");
}
    
int main(int argc, char* argv[])
{
    int retval;
    struct conf *cf; 
    struct str_node *cnode; /* Current node in filelist. */

    cf = &gcf;

    
    /* parse command line. */
    retval = do_opt(argc, argv, cf);
    if(retval != 0){
	printf("Usage: %s [-w|-m|-r] -f encode -t encode filename ...\n", argv[0]);
	return 1;
    }

    /* init global stuff. */
    retval = do_init(cf);
    if(retval == -1)
    {
	printf("Failed to initialize.\n");
	return 2;
    }

    check_files(&(cf->filelist));
    if(!cf->filelist.head) {
	printf("No file to process, exit.\n");
	return 3;
    }

    /* Warning user of the danger. */
    if (cf->warn){
	retval = print_confirm(cf, &(cf->filelist));
	if(!retval) /* not confirmed. */
	    return 4;
    }

    /* Process all the files/dirs. */
    for(cnode=cf->filelist.head; cnode != NULL; cnode = cnode->next){
	char* root;

	root = cnode->item;
	/* Real action. */
	if (cf->recur){
	    retval = do_walk(root, cf);
	}else{
	    retval = do_single(root, cf);
	}

    }
    if (icd != (iconv_t)-1)
	iconv_close(icd);
    
    return retval;
}

Reply to: