Re: UTF-8
On Wed, Sep 11, 2002 at 07:54:19PM +1000, Jay Hap-hang Yu wrote:
>
> It'd be great if someone come up with some perl script to convert
> filenames to UTF8. I guess that can be done with iconv + perl
>
Spend the whole morning on this one. Actually, it is an excuse
to get away from other things. :) Minimal test and don't recommand
to anyone except for testing purpose. I don't think store filename
in UTF-8 now is wise.
Well... someone have to eat the crab first!
Need libreadline-dev to compile. Read the source first.
Patch, bug reports are welcomed
--
hashao| 故贵以贱为本,高以下为基。是以侯王自称孤、寡、不谷。此非
hashao| 以贱为本邪?非乎?故致誉无誉。是故不欲□□如玉,珞珞如石。
/* Released under the Gnu Public License (GPL).
* It might make your file system unusable. Use it on your own risk.
* -- hashao
*
* Please visit: http://www.debian.org/intl/zh/
*/
/* Convert the encoding of a directory name recursively.
* + content of symbolic links are also converted. (good/bad?)
*/
/* Usage: dirconv -f encode -t encode [-w] filename
* -w: Do not warn.
*/
/* To compile:
* gcc -lreadline -o dirconv dirconv.c
* You need libreadline-dev to compile it, of course.
*/
/* ChangeLog:
*
* 2002-09-15:
* + first release. Only limited test.
* + content of symbolic links are also converted. (good/bad?)
* + Don't know what will happen on vfat systems. I will not
* try it on my own vfat system!
* + Use it on your own risk. (hashao)
*/
#define _GNU_SOURCE 1
#define SYMLEN 4096 /* Symbolic buffer length. */
#include <unistd.h>
#include <errno.h>
#include <limits.h>
#include <ftw.h>
#include <iconv.h>
#include <stdio.h>
#include <readline/readline.h>
iconv_t icd = (iconv_t)(-1); /* global iconv handler. */
int maxlen = 256; /* max len of the constr. */
char *convstr = NULL; /* buff hold the converted file name. */
int do_iconv(const char *file)
{
char *inbuf, *outbuf;
size_t inleft, outleft;
size_t retval;
memset(convstr, 0, maxlen);
inbuf = (char*)file;
outbuf = convstr;
inleft = strlen(inbuf);
outleft = maxlen;
retval = iconv(icd, NULL, NULL, NULL, NULL);
retval = iconv(icd, &inbuf, &inleft, &outbuf, &outleft);
/* convert ok. */
if (retval != (size_t)(-1))
return retval;
/* encoding error. */
if (errno != E2BIG)
return retval;
/* No enough output space. */
free(convstr);
maxlen = maxlen + MB_LEN_MAX*inleft;
convstr = (char*)malloc(maxlen);
return do_iconv(file);
}
int do_rename(const char* file, struct FTW *s)
{
int retval;
char *newfullpath;
newfullpath = (char*)malloc(sizeof(convstr)+ s->base + 1);
memset(newfullpath, 0, sizeof(convstr) + s->base + 1);
strncat(newfullpath, file, s->base);
strcat(newfullpath, convstr);
retval = rename(file, newfullpath);
if (!retval)
printf("converted %s to %s\n", file, newfullpath);
else
printf("!!! failed to convert %s\n", file);
free(newfullpath);
}
/* Convert a symbolic link and its content. */
int do_symlink(const char* file, struct FTW *s)
{
int retval;
char *newfullpath;
char symbuf[SYMLEN];
char *newlink;
newfullpath = (char*)malloc(sizeof(convstr)+ s->base + 1);
memset(newfullpath, 0, sizeof(convstr) + s->base + 1);
strncat(newfullpath, file, s->base);
strcat(newfullpath, convstr);
memset(symbuf, 0, SYMLEN);
/* Get the content of symbolic link and convert it too. */
retval = readlink(file, symbuf, SYMLEN);
if (retval == -1){
printf("!!! failed to read symbolic link: %s\n", file);
return -1;
}
{
char *inbuf, *outbuf;
size_t inleft, outleft;
memset(convstr, 0, maxlen);
inbuf = symbuf;
outbuf = newlink = (char*)malloc(strlen(symbuf)*MB_LEN_MAX);
inleft = strlen(symbuf);
outleft = SYMLEN;
retval = iconv(icd, NULL, NULL, NULL, NULL);
retval = iconv(icd, &inbuf, &inleft, &outbuf, &outleft);
if (retval == (size_t)(-1)){
printf("!!! Cannot iconv symlink content %s for %s\n",
symbuf, file);
return -1;
}
}
/* Relink to the new symlink. */
retval = symlink(newlink, newfullpath);
if (retval){
printf("!!! Cannot create symlink %s to %s.\n", newfullpath, newlink);
return -1;
}else {
printf("converted symlink %s to %s points to \n", file,
newfullpath, newlink);
retval = unlink(file);
if (retval){
printf("!!! Cannot unlink symlink %s\n", file);
return -1;
}
}
return 0;
}
int walk_func(const char* file, const struct stat *sb, int flag, struct FTW *s)
{
int retval;
char *basefile;
basefile = (char*)(file + s->base);
retval = do_iconv(basefile);
if (retval == (size_t)(-1)){
printf("!!! Cannot iconv %s, skip.\n", basefile);
if (S_ISDIR(sb->st_mode))
printf("In dir %s\n", file);
return 0;
}
if S_ISLNK(sb->st_mode) {
do_symlink(file, s);
}else {
do_rename(file, s);
}
if (S_ISDIR(sb->st_mode))
printf("In dir %s\n", file);
/* Return non-0 will stop ftw(). */
return 0;
}
int do_walk(char* root)
{
int retval;
//printf("==== Test(1): FTW_PHYS (don't follow symbolic links) ====\n");
/* Cannot set FTW_CHDIR. Maybe a bug in libc. With it set, stop
* recursive at the first level. */
/* Do depth first, do no follow symbolic directory. */
retval = nftw(root, walk_func, 100, FTW_DEPTH|FTW_PHYS);
return retval;
}
/* Hold configuration for this program. */
struct conf {
char* from;
char* to;
char* root; /* Root diretory to convert. */
int warn; /* warning flag. */
};
int do_init(struct conf *cf)
{
convstr = (char*)malloc(maxlen);
icd = iconv_open(cf->to, cf->from);
return -1;
}
/* Read command line options. */
int do_opt(int argc, char* argv[], struct conf *cf)
{
int retval;
int c;
cf->to = cf->from = cf->root = NULL;
cf->warn = 1;
while(1) {
int curoptind = optind ? optind : 1;
c = getopt(argc, argv, "-t:f:w");
if (c == -1)
break;
switch (c) {
case 1:
if (!cf->root)
cf->root = optarg;
break;
case 't':
cf->to = optarg;
break;
case 'f':
cf->from = optarg;
break;
case 'w':
cf->warn = 0;
break;
case '?':
return -1;
break;
}
}
if(!(cf->to && cf->from && cf->root)) {
printf("Usage: %s [-w] -f encode -t encode filename \n", argv[0]);
return -1;
}
return 0;
}
int main(int argc, char* argv[])
{
int retval;
struct conf cf;
struct stat *myst;
retval = do_opt(argc, argv, &cf);
if(retval != 0)
return 1;
do_init(&cf);
if (icd == (iconv_t)(-1)) {
printf("cannot do conversion between %s and %s\n", cf.to, cf.from);
return 2;
}
myst = (struct stat*) malloc(sizeof(struct stat));
retval = stat(cf.root, myst);
free(myst);
if (retval == -1) {
printf("%s cannot be converted.\n", cf.root);
perror(cf.root);
return 3;
}
if (cf.warn){
/* Warn the user. */
char* c;
printf ("I am going to convert %s and its subdirectory (if any) \n"
"from [%s] to [%s]. It is **dangerous**!!! It might mess \n"
"up all your file system!\n"
"=== %s: [%s] -> [%s] ===\n",
cf.root, cf.from, cf.to, cf.root, cf.from, cf.to);
c = readline("Are you sure?! [N/y]: ");
if (strcasecmp(c, "y"))
return 0;
c = readline("really? [N/y]: ");
if (strcasecmp(c, "y"))
return 0;
}
do_walk(cf.root);
}
Reply to:
- References:
- UTF-8
- From: Jay Hap-hang Yu <jay@amnesiac.homelinux.org>
- Re: UTF-8
- From: Arne Goetje <20020531antispam@gmx.net>
- Re: UTF-8
- From: Jay Hap-hang Yu <jay@amnesiac.homelinux.org>