Re: Solving the compression dilemma when rsync-ing Debian versions
In message <[🔎] 3B3D8082.4241CF61@bluewin.ch> you write:
> The figures are not as good as rsync'ing uncompressed but still more
> than halving the download when compressed. I think this is about the
> range which could be gained if used on all packages.
>
> Of course there has to be an old package with the new name in place else
> it won't have any effect.
Of course, I have a patch for this too. You should have asked 8).
It's against an older version of rsync, so it'd want checking.
If it works, I'll resubmit to Tridge...
Cheers,
Rusty.
--
Premature optmztion is rt of all evl. --DK
diff -urN rsync-2.4.6/Makefile.in rsync-latest/Makefile.in
--- rsync-2.4.6/Makefile.in Wed Sep 6 13:46:43 2000
+++ rsync-latest/Makefile.in Thu Sep 28 20:16:37 2000
@@ -25,7 +25,7 @@
ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \
zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
zlib/zutil.o zlib/adler32.o
-OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o
+OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o alternate.o
OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o fileio.o
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
OBJS=$(OBJS1) $(OBJS2) $(DAEMON_OBJ) $(LIBOBJ) $(ZLIBOBJ)
diff -urN rsync-2.4.6/alternate.c rsync-latest/alternate.c
--- rsync-2.4.6/alternate.c Thu Jan 1 10:00:00 1970
+++ rsync-latest/alternate.c Thu Sep 28 20:55:20 2000
@@ -0,0 +1,117 @@
+#include "rsync.h"
+
+extern char *compare_dest;
+extern int verbose;
+
+/* Alternate methods for opening files, if local doesn't exist */
+/* Sanity check that we are about to open regular file */
+int do_open_regular(char *fname)
+{
+ STRUCT_STAT st;
+
+ if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
+ return do_open(fname, O_RDONLY, 0);
+
+ return -1;
+}
+
+static void split_names(char *fname, char **dirname, char **basename)
+{
+ char *slash;
+
+ slash = strrchr(fname, '/');
+ if (slash) {
+ *dirname = fname;
+ *slash = '\0';
+ *basename = slash+1;
+ } else {
+ *basename = fname;
+ *dirname = ".";
+ }
+}
+
+static unsigned int measure_name(const char *name,
+ const char *basename,
+ const char *ext)
+{
+ int namelen = strlen(name);
+ int extlen = strlen(ext);
+ unsigned int score = 0;
+
+ /* Extensions must match */
+ if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0)
+ return 0;
+
+ /* Now score depends on similarity of prefix */
+ for (; *name==*basename && *name; name++, basename++)
+ score++;
+ return score;
+}
+
+int open_alternate_base_fuzzy(const char *fname)
+{
+ DIR *d;
+ struct dirent *di;
+ char *basename, *dirname;
+ char mangled_name[MAXPATHLEN];
+ char bestname[MAXPATHLEN];
+ unsigned int bestscore = 0;
+ const char *ext;
+
+ /* FIXME: can we assume fname fits here? */
+ strcpy(mangled_name, fname);
+
+ split_names(mangled_name, &dirname, &basename);
+ d = opendir(dirname);
+ if (!d) {
+ rprintf(FERROR,"recv_generator opendir(%s): %s\n",
+ dirname,strerror(errno));
+ return -1;
+ }
+
+ /* Get final extension, eg. .gz; never full basename though. */
+ ext = strrchr(basename + 1, '.');
+ if (!ext)
+ ext = basename + strlen(basename); /* ext = "" */
+
+ while ((di = readdir(d)) != NULL) {
+ const char *dname = d_name(di);
+ unsigned int score;
+
+ if (strcmp(dname,".")==0 ||
+ strcmp(dname,"..")==0)
+ continue;
+
+ score = measure_name(dname, basename, ext);
+ if (verbose > 4)
+ rprintf(FINFO,"fuzzy score for %s = %u\n",
+ dname, score);
+ if (score > bestscore) {
+ strcpy(bestname, dname);
+ bestscore = score;
+ }
+ }
+ closedir(d);
+
+ /* Found a candidate. */
+ if (bestscore != 0) {
+ char fuzzyname[MAXPATHLEN];
+
+ slprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname);
+ if (verbose > 2)
+ rprintf(FINFO,"fuzzy match %s->%s\n",
+ fname, fuzzyname);
+ return do_open_regular(fuzzyname);
+ }
+ return -1;
+}
+
+int open_alternate_base_comparedir(const char *fname)
+{
+ char fnamebuf[MAXPATHLEN];
+ /* try the file at compare_dest instead */
+ slprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
+
+ /* FIXME: now follows symlinks... */
+ return do_open_regular(fnamebuf);
+}
diff -urN rsync-2.4.6/generator.c rsync-latest/generator.c
--- rsync-2.4.6/generator.c Wed Sep 6 13:46:43 2000
+++ rsync-latest/generator.c Thu Sep 28 21:27:45 2000
@@ -37,11 +37,12 @@
extern int always_checksum;
extern int modify_window;
extern char *compare_dest;
+extern int fuzzy;
/* choose whether to skip a particular file */
static int skip_file(char *fname,
- struct file_struct *file, STRUCT_STAT *st)
+ struct file_struct *file, const STRUCT_STAT *st)
{
if (st->st_size != file->length) {
return 0;
@@ -175,6 +176,155 @@
return s;
}
+static void generate_directory(struct file_struct *file,
+ char *fname,
+ int statret,
+ const STRUCT_STAT *st)
+{
+ if (dry_run) return;
+ if (statret == 0 && !S_ISDIR(st->st_mode)) {
+ if (robust_unlink(fname) != 0) {
+ rprintf(FERROR,"unlink %s : %s\n",fname,strerror(errno));
+ return;
+ }
+ statret = -1;
+ }
+ if (statret != 0 &&
+ do_mkdir(fname,file->mode) != 0 &&
+ errno != EEXIST) {
+ if (!(relative_paths && errno==ENOENT &&
+ create_directory_path(fname)==0 &&
+ do_mkdir(fname,file->mode)==0)) {
+ rprintf(FERROR,"mkdir %s : %s (2)\n",
+ fname,strerror(errno));
+ }
+ }
+ if (set_perms(fname,file,NULL,0) && verbose)
+ rprintf(FINFO,"%s/\n",fname);
+}
+
+static void generate_symlink(struct file_struct *file,
+ char *fname,
+ int statret,
+ STRUCT_STAT *st)
+{
+#if SUPPORT_LINKS
+ char lnk[MAXPATHLEN];
+ int l;
+ extern int safe_symlinks;
+
+ if (safe_symlinks && unsafe_symlink(file->link, fname)) {
+ if (verbose) {
+ rprintf(FINFO,"ignoring unsafe symlink %s -> %s\n",
+ fname,file->link);
+ }
+ return;
+ }
+ if (statret == 0) {
+ l = readlink(fname,lnk,MAXPATHLEN-1);
+ if (l > 0) {
+ lnk[l] = 0;
+ if (strcmp(lnk,file->link) == 0) {
+ set_perms(fname,file,st,1);
+ return;
+ }
+ }
+ delete_file(fname);
+ }
+ if (do_symlink(file->link,fname) != 0) {
+ rprintf(FERROR,"symlink %s -> %s : %s\n",
+ fname,file->link,strerror(errno));
+ } else {
+ set_perms(fname,file,NULL,0);
+ if (verbose) {
+ rprintf(FINFO,"%s -> %s\n",
+ fname,file->link);
+ }
+ }
+#endif
+}
+
+#ifdef HAVE_MKNOD
+static void generate_device(struct file_struct *file,
+ char *fname,
+ int statret,
+ STRUCT_STAT *st)
+{
+ if (statret != 0 ||
+ st->st_mode != file->mode ||
+ st->st_rdev != file->rdev) {
+ delete_file(fname);
+ if (verbose > 2)
+ rprintf(FINFO,"mknod(%s,0%o,0x%x)\n",
+ fname,(int)file->mode,(int)file->rdev);
+ if (do_mknod(fname,file->mode,file->rdev) != 0) {
+ rprintf(FERROR,"mknod %s : %s\n",fname,strerror(errno));
+ } else {
+ set_perms(fname,file,NULL,0);
+ if (verbose)
+ rprintf(FINFO,"%s\n",fname);
+ }
+ } else {
+ set_perms(fname,file,st,1);
+ }
+}
+#endif /*HAVE_MKNOD*/
+
+/* Returns -1 for can't open (null file), -2 for skip */
+static int open_base_file(struct file_struct *file,
+ char *fname,
+ int statret,
+ STRUCT_STAT *st)
+{
+ int fd = -1;
+
+ if (statret == 0) {
+ if (S_ISREG(st->st_mode)) {
+ if (update_only
+ && cmp_modtime(st->st_mtime, file->modtime) > 0) {
+ if (verbose > 1)
+ rprintf(FINFO,"%s is newer\n",fname);
+ return -2;
+ }
+ if (skip_file(fname, file, st)) {
+ set_perms(fname, file, st, 1);
+ return -2;
+ }
+ fd = do_open(fname, O_RDONLY, 0);
+ if (fd == -1) {
+ rprintf(FERROR,"failed to open %s, continuing : %s\n",fname,strerror(errno));
+ return -1;
+ } else
+ return fd;
+ } else {
+ /* Try to use symlink contents */
+ if (S_ISLNK(st->st_mode)) {
+ fd = do_open_regular(fname);
+ /* Don't delete yet; receiver will need it */
+ } else {
+ if (delete_file(fname) != 0) {
+ if (fd != -1)
+ close(fd);
+ return -2;
+ }
+ }
+ }
+ }
+
+ if (fd == -1 && compare_dest != NULL)
+ fd = open_alternate_base_comparedir(fname);
+
+ if (fd == -1 && fuzzy)
+ fd = open_alternate_base_fuzzy(fname);
+
+ /* Update stat to understand size */
+ if (fd != -1) {
+ if (do_fstat(fd, st) != 0)
+ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
+ }
+
+ return fd;
+}
void recv_generator(char *fname,struct file_list *flist,int i,int f_out)
{
@@ -184,12 +334,10 @@
struct sum_struct *s;
int statret;
struct file_struct *file = flist->files[i];
- char *fnamecmp;
- char fnamecmpbuf[MAXPATHLEN];
- extern char *compare_dest;
extern int list_only;
extern int preserve_perms;
extern int only_existing;
+ int stat_errno;
if (list_only) return;
@@ -197,8 +345,9 @@
rprintf(FINFO,"recv_generator(%s,%d)\n",fname,i);
statret = link_stat(fname,&st);
+ stat_errno = errno;
- if (only_existing && statret == -1 && errno == ENOENT) {
+ if (only_existing && statret == -1 && stat_errno == ENOENT) {
/* we only want to update existing files */
if (verbose > 1) rprintf(FINFO,"not creating %s\n",fname);
return;
@@ -214,84 +363,18 @@
}
if (S_ISDIR(file->mode)) {
- if (dry_run) return;
- if (statret == 0 && !S_ISDIR(st.st_mode)) {
- if (robust_unlink(fname) != 0) {
- rprintf(FERROR,"unlink %s : %s\n",fname,strerror(errno));
- return;
- }
- statret = -1;
- }
- if (statret != 0 && do_mkdir(fname,file->mode) != 0 && errno != EEXIST) {
- if (!(relative_paths && errno==ENOENT &&
- create_directory_path(fname)==0 &&
- do_mkdir(fname,file->mode)==0)) {
- rprintf(FERROR,"mkdir %s : %s (2)\n",
- fname,strerror(errno));
- }
- }
- if (set_perms(fname,file,NULL,0) && verbose)
- rprintf(FINFO,"%s/\n",fname);
+ generate_directory(file, fname, statret, &st);
return;
}
if (preserve_links && S_ISLNK(file->mode)) {
-#if SUPPORT_LINKS
- char lnk[MAXPATHLEN];
- int l;
- extern int safe_symlinks;
-
- if (safe_symlinks && unsafe_symlink(file->link, fname)) {
- if (verbose) {
- rprintf(FINFO,"ignoring unsafe symlink %s -> %s\n",
- fname,file->link);
- }
- return;
- }
- if (statret == 0) {
- l = readlink(fname,lnk,MAXPATHLEN-1);
- if (l > 0) {
- lnk[l] = 0;
- if (strcmp(lnk,file->link) == 0) {
- set_perms(fname,file,&st,1);
- return;
- }
- }
- delete_file(fname);
- }
- if (do_symlink(file->link,fname) != 0) {
- rprintf(FERROR,"symlink %s -> %s : %s\n",
- fname,file->link,strerror(errno));
- } else {
- set_perms(fname,file,NULL,0);
- if (verbose) {
- rprintf(FINFO,"%s -> %s\n",
- fname,file->link);
- }
- }
-#endif
+ generate_symlink(file, fname, statret, &st);
return;
}
#ifdef HAVE_MKNOD
if (am_root && preserve_devices && IS_DEVICE(file->mode)) {
- if (statret != 0 ||
- st.st_mode != file->mode ||
- st.st_rdev != file->rdev) {
- delete_file(fname);
- if (verbose > 2)
- rprintf(FINFO,"mknod(%s,0%o,0x%x)\n",
- fname,(int)file->mode,(int)file->rdev);
- if (do_mknod(fname,file->mode,file->rdev) != 0) {
- rprintf(FERROR,"mknod %s : %s\n",fname,strerror(errno));
- } else {
- set_perms(fname,file,NULL,0);
- if (verbose)
- rprintf(FINFO,"%s\n",fname);
- }
- } else {
- set_perms(fname,file,&st,1);
- }
+ generate_device(file, fname, statret, &st);
return;
}
#endif
@@ -307,74 +390,28 @@
return;
}
- fnamecmp = fname;
-
- if ((statret == -1) && (compare_dest != NULL)) {
- /* try the file at compare_dest instead */
- int saveerrno = errno;
- slprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
- statret = link_stat(fnamecmpbuf,&st);
- if (!S_ISREG(st.st_mode))
- statret = -1;
- if (statret == -1)
- errno = saveerrno;
- else
- fnamecmp = fnamecmpbuf;
- }
-
- if (statret == -1) {
- if (errno == ENOENT) {
- write_int(f_out,i);
- if (!dry_run) send_sums(NULL,f_out);
- } else {
- if (verbose > 1)
- rprintf(FERROR,"recv_generator failed to open %s\n",fname);
- }
- return;
- }
-
- if (!S_ISREG(st.st_mode)) {
- if (delete_file(fname) != 0) {
- return;
- }
-
- /* now pretend the file didn't exist */
- write_int(f_out,i);
- if (!dry_run) send_sums(NULL,f_out);
- return;
- }
-
- if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) {
+ /* Failed to stat for some other reason. */
+ if (statret == -1 && stat_errno != ENOENT) {
if (verbose > 1)
- rprintf(FINFO,"%s is newer\n",fname);
+ rprintf(FERROR,"recv_generator failed to open %s\n",
+ fname);
return;
}
- if (skip_file(fname, file, &st)) {
- if (fnamecmp == fname)
- set_perms(fname,file,&st,1);
+ fd = open_base_file(file, fname, statret, &st);
+ if (fd == -2)
return;
- }
- if (dry_run) {
- write_int(f_out,i);
- return;
+ if ((whole_file || dry_run) && fd != -1) {
+ close(fd);
+ fd = -1;
}
- if (whole_file) {
- write_int(f_out,i);
- send_sums(NULL,f_out);
- return;
- }
-
- /* open the file */
- fd = do_open(fnamecmp, O_RDONLY, 0);
-
if (fd == -1) {
- rprintf(FERROR,"failed to open %s, continuing : %s\n",fnamecmp,strerror(errno));
- /* pretend the file didn't exist */
+ /* the file didn't exist, or we can pretend it doesn't */
write_int(f_out,i);
- send_sums(NULL,f_out);
+ if (!dry_run)
+ send_sums(NULL,f_out);
return;
}
@@ -385,7 +422,7 @@
}
if (verbose > 3)
- rprintf(FINFO,"gen mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
+ rprintf(FINFO,"gen mapped %s of size %.0f\n",fname,(double)st.st_size);
s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size));
diff -urN rsync-2.4.6/options.c rsync-latest/options.c
--- rsync-2.4.6/options.c Wed Sep 6 13:46:43 2000
+++ rsync-latest/options.c Thu Sep 28 15:42:22 2000
@@ -72,6 +72,7 @@
#else
int modify_window=0;
#endif
+int fuzzy=0;
int blocking_io=0;
char *backup_suffix = BACKUP_SUFFIX;
@@ -172,6 +173,7 @@
rprintf(F," --log-format=FORMAT log file transfers using specified format\n");
rprintf(F," --password-file=FILE get password from FILE\n");
rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per second\n");
+ rprintf(F," --fuzzy use similar file as basis if it does't exist\n");
rprintf(F," -h, --help show this help screen\n");
rprintf(F,"\n");
@@ -188,7 +190,7 @@
OPT_LOG_FORMAT, OPT_PASSWORD_FILE, OPT_SIZE_ONLY, OPT_ADDRESS,
OPT_DELETE_AFTER, OPT_EXISTING, OPT_MAX_DELETE, OPT_BACKUP_DIR,
OPT_IGNORE_ERRORS, OPT_BWLIMIT, OPT_BLOCKING_IO,
- OPT_MODIFY_WINDOW};
+ OPT_MODIFY_WINDOW, OPT_FUZZY};
static char *short_options = "oblLWHpguDCtcahvqrRIxnSe:B:T:zP";
@@ -255,6 +257,7 @@
{"address", 1, 0, OPT_ADDRESS},
{"max-delete", 1, 0, OPT_MAX_DELETE},
{"backup-dir", 1, 0, OPT_BACKUP_DIR},
+ {"fuzzy", 0, 0, OPT_FUZZY},
{0,0,0,0}};
@@ -596,6 +599,10 @@
backup_dir = optarg;
break;
+ case OPT_FUZZY:
+ fuzzy = 1;
+ break;
+
default:
slprintf(err_buf,sizeof(err_buf),"unrecognised option\n");
return 0;
@@ -767,7 +774,9 @@
args[ac++] = "--compare-dest";
args[ac++] = compare_dest;
}
-
+
+ if (fuzzy && am_sender)
+ args[ac++] = "--fuzzy";
*argc = ac;
}
diff -urN rsync-2.4.6/proto.h rsync-latest/proto.h
--- rsync-2.4.6/proto.h Wed Sep 6 13:46:43 2000
+++ rsync-latest/proto.h Thu Sep 28 21:32:14 2000
@@ -212,3 +212,6 @@
int cmp_modtime(time_t file1, time_t file2);
int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6);
int sys_gettimeofday(struct timeval *tv);
+int do_open_regular(char *fname);
+int open_alternate_base_fuzzy(const char *fname);
+int open_alternate_base_comparedir(const char *fname);
diff -urN rsync-2.4.6/receiver.c rsync-latest/receiver.c
--- rsync-2.4.6/receiver.c Fri Mar 31 00:23:03 2000
+++ rsync-latest/receiver.c Thu Sep 28 21:47:10 2000
@@ -35,6 +35,7 @@
extern char *compare_dest;
extern int make_backups;
extern char *backup_suffix;
+extern int fuzzy;
static struct delete_list {
dev_t dev;
@@ -303,8 +303,6 @@
STRUCT_STAT st;
char *fname;
char fnametmp[MAXPATHLEN];
- char *fnamecmp;
- char fnamecmpbuf[MAXPATHLEN];
struct map_struct *buf;
int i;
struct file_struct *file;
@@ -362,28 +360,24 @@
if (verbose > 2)
rprintf(FINFO,"recv_files(%s)\n",fname);
- fnamecmp = fname;
-
/* open the file */
- fd1 = do_open(fnamecmp, O_RDONLY, 0);
+ fd1 = do_open(fname, O_RDONLY, 0);
- if ((fd1 == -1) && (compare_dest != NULL)) {
- /* try the file at compare_dest instead */
- slprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
- compare_dest,fname);
- fnamecmp = fnamecmpbuf;
- fd1 = do_open(fnamecmp, O_RDONLY, 0);
- }
+ if (fd1 == -1 && compare_dest != NULL)
+ fd1 = open_alternate_base_comparedir(fname);
+
+ if (fd1 == -1 && fuzzy)
+ fd1 = open_alternate_base_fuzzy(fname);
if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
- rprintf(FERROR,"fstat %s : %s\n",fnamecmp,strerror(errno));
+ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
receive_data(f_in,NULL,-1,NULL,file->length);
close(fd1);
continue;
}
if (fd1 != -1 && !S_ISREG(st.st_mode)) {
- rprintf(FERROR,"%s : not a regular file (recv_files)\n",fnamecmp);
+ rprintf(FERROR,"%s : not a regular file (recv_files)\n",fname);
receive_data(f_in,NULL,-1,NULL,file->length);
close(fd1);
continue;
@@ -399,7 +393,7 @@
if (fd1 != -1 && st.st_size > 0) {
buf = map_file(fd1,st.st_size);
if (verbose > 2)
- rprintf(FINFO,"recv mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
+ rprintf(FINFO,"recv mapped %s of size %.0f\n",fname,(double)st.st_size);
} else {
buf = NULL;
}
diff -urN rsync-2.4.6/util.c rsync-latest/util.c
--- rsync-2.4.6/util.c Wed Sep 6 13:46:43 2000
+++ rsync-latest/util.c Thu Sep 28 20:26:15 2000
@@ -963,7 +963,6 @@
*******************************************************************/
int cmp_modtime(time_t file1, time_t file2)
{
- time_t diff;
extern int modify_window;
if (file2 > file1) {
Reply to: