[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Solving the compression dilemma when rsync-ing Debian versions



In message <[🔎] 3B3D8082.4241CF61@bluewin.ch> you write:
> The figures are not as good as rsync'ing uncompressed but still more
> than halving the download when compressed. I think this is about the
> range which could be gained if used on all packages.
> 
> Of course there has to be an old package with the new name in place else
> it won't have any effect.

Of course, I have a patch for this too.  You should have asked 8).

It's against an older version of rsync, so it'd want checking.

If it works, I'll resubmit to Tridge...

Cheers,
Rusty.
--
Premature optmztion is rt of all evl. --DK

diff -urN rsync-2.4.6/Makefile.in rsync-latest/Makefile.in
--- rsync-2.4.6/Makefile.in	Wed Sep  6 13:46:43 2000
+++ rsync-latest/Makefile.in	Thu Sep 28 20:16:37 2000
@@ -25,7 +25,7 @@
 ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \
 	zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
 	zlib/zutil.o zlib/adler32.o 
-OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o
+OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o alternate.o
 OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o fileio.o
 DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
 OBJS=$(OBJS1) $(OBJS2) $(DAEMON_OBJ) $(LIBOBJ) $(ZLIBOBJ)
diff -urN rsync-2.4.6/alternate.c rsync-latest/alternate.c
--- rsync-2.4.6/alternate.c	Thu Jan  1 10:00:00 1970
+++ rsync-latest/alternate.c	Thu Sep 28 20:55:20 2000
@@ -0,0 +1,117 @@
+#include "rsync.h"
+
+extern char *compare_dest;
+extern int verbose;
+
+/* Alternate methods for opening files, if local doesn't exist */
+/* Sanity check that we are about to open regular file */
+int do_open_regular(char *fname)
+{
+	STRUCT_STAT st;
+
+	if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
+		return do_open(fname, O_RDONLY, 0);
+
+	return -1;
+}
+
+static void split_names(char *fname, char **dirname, char **basename)
+{
+	char *slash;
+
+	slash = strrchr(fname, '/');
+	if (slash) {
+		*dirname = fname;
+		*slash = '\0';
+		*basename = slash+1;
+	} else {
+		*basename = fname;
+		*dirname = ".";
+	}
+}
+
+static unsigned int measure_name(const char *name,
+				 const char *basename,
+				 const char *ext)
+{
+	int namelen = strlen(name);
+	int extlen = strlen(ext);
+	unsigned int score = 0;
+
+	/* Extensions must match */
+	if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0)
+		return 0;
+
+	/* Now score depends on similarity of prefix */
+	for (; *name==*basename && *name; name++, basename++)
+		score++;
+	return score;
+}
+
+int open_alternate_base_fuzzy(const char *fname)
+{
+	DIR *d;
+	struct dirent *di;
+	char *basename, *dirname;
+	char mangled_name[MAXPATHLEN];
+	char bestname[MAXPATHLEN];
+	unsigned int bestscore = 0;
+	const char *ext;
+
+	/* FIXME: can we assume fname fits here? */
+	strcpy(mangled_name, fname);
+
+	split_names(mangled_name, &dirname, &basename);
+	d = opendir(dirname);
+	if (!d) {
+		rprintf(FERROR,"recv_generator opendir(%s): %s\n",
+			dirname,strerror(errno));
+		return -1;
+	}
+
+	/* Get final extension, eg. .gz; never full basename though. */
+	ext = strrchr(basename + 1, '.');
+	if (!ext)
+		ext = basename + strlen(basename); /* ext = "" */
+
+	while ((di = readdir(d)) != NULL) {
+		const char *dname = d_name(di);
+		unsigned int score;
+
+		if (strcmp(dname,".")==0 ||
+		    strcmp(dname,"..")==0)
+			continue;
+		
+		score = measure_name(dname, basename, ext);
+		if (verbose > 4)
+			rprintf(FINFO,"fuzzy score for %s = %u\n",
+				dname, score);
+		if (score > bestscore) {
+			strcpy(bestname, dname); 
+			bestscore = score;
+		}
+	}
+	closedir(d);
+
+	/* Found a candidate. */
+	if (bestscore != 0) {
+		char fuzzyname[MAXPATHLEN];
+
+		slprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname);
+		if (verbose > 2)
+			rprintf(FINFO,"fuzzy match %s->%s\n",
+				fname, fuzzyname);
+		return do_open_regular(fuzzyname);
+	}
+	return -1;
+}
+
+int open_alternate_base_comparedir(const char *fname)
+{
+	char fnamebuf[MAXPATHLEN];
+	/* try the file at compare_dest instead */
+	slprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
+
+	/* FIXME: now follows symlinks... */
+	return do_open_regular(fnamebuf);
+}
diff -urN rsync-2.4.6/generator.c rsync-latest/generator.c
--- rsync-2.4.6/generator.c	Wed Sep  6 13:46:43 2000
+++ rsync-latest/generator.c	Thu Sep 28 21:27:45 2000
@@ -37,11 +37,12 @@
 extern int always_checksum;
 extern int modify_window;
 extern char *compare_dest;
+extern int fuzzy;
 
 
 /* choose whether to skip a particular file */
 static int skip_file(char *fname,
-		     struct file_struct *file, STRUCT_STAT *st)
+		     struct file_struct *file, const STRUCT_STAT *st)
 {
 	if (st->st_size != file->length) {
 		return 0;
@@ -175,6 +176,155 @@
 	return s;
 }
 
+static void generate_directory(struct file_struct *file,
+			       char *fname, 
+			       int statret, 
+			       const STRUCT_STAT *st)
+{
+	if (dry_run) return;
+	if (statret == 0 && !S_ISDIR(st->st_mode)) {
+		if (robust_unlink(fname) != 0) {
+			rprintf(FERROR,"unlink %s : %s\n",fname,strerror(errno));
+			return;
+		}
+		statret = -1;
+	}
+	if (statret != 0 &&
+	    do_mkdir(fname,file->mode) != 0 &&
+	    errno != EEXIST) {
+		if (!(relative_paths && errno==ENOENT && 
+		      create_directory_path(fname)==0 && 
+		      do_mkdir(fname,file->mode)==0)) {
+			rprintf(FERROR,"mkdir %s : %s (2)\n",
+				fname,strerror(errno));
+		}
+	}
+	if (set_perms(fname,file,NULL,0) && verbose) 
+		rprintf(FINFO,"%s/\n",fname);
+}
+
+static void generate_symlink(struct file_struct *file,
+			     char *fname, 
+			     int statret, 
+			     STRUCT_STAT *st)
+{
+#if SUPPORT_LINKS
+	char lnk[MAXPATHLEN];
+	int l;
+	extern int safe_symlinks;
+
+	if (safe_symlinks && unsafe_symlink(file->link, fname)) {
+		if (verbose) {
+			rprintf(FINFO,"ignoring unsafe symlink %s -> %s\n",
+				fname,file->link);
+		}
+		return;
+	}
+	if (statret == 0) {
+		l = readlink(fname,lnk,MAXPATHLEN-1);
+		if (l > 0) {
+			lnk[l] = 0;
+			if (strcmp(lnk,file->link) == 0) {
+				set_perms(fname,file,st,1);
+				return;
+			}
+		}
+		delete_file(fname);
+	}
+	if (do_symlink(file->link,fname) != 0) {
+		rprintf(FERROR,"symlink %s -> %s : %s\n",
+			fname,file->link,strerror(errno));
+	} else {
+		set_perms(fname,file,NULL,0);
+		if (verbose) {
+			rprintf(FINFO,"%s -> %s\n",
+				fname,file->link);
+		}
+	}
+#endif
+}
+
+#ifdef HAVE_MKNOD
+static void generate_device(struct file_struct *file,
+			    char *fname, 
+			    int statret, 
+			    STRUCT_STAT *st)
+{
+	if (statret != 0 || 
+	    st->st_mode != file->mode ||
+	    st->st_rdev != file->rdev) {	
+		delete_file(fname);
+		if (verbose > 2)
+			rprintf(FINFO,"mknod(%s,0%o,0x%x)\n",
+				fname,(int)file->mode,(int)file->rdev);
+		if (do_mknod(fname,file->mode,file->rdev) != 0) {
+			rprintf(FERROR,"mknod %s : %s\n",fname,strerror(errno));
+		} else {
+			set_perms(fname,file,NULL,0);
+			if (verbose)
+				rprintf(FINFO,"%s\n",fname);
+		}
+	} else {
+		set_perms(fname,file,st,1);
+	}
+}
+#endif /*HAVE_MKNOD*/
+
+/* Returns -1 for can't open (null file), -2 for skip */
+static int open_base_file(struct file_struct *file,
+			  char *fname, 
+			  int statret, 
+			  STRUCT_STAT *st)
+{
+	int fd = -1;
+
+	if (statret == 0) {
+		if (S_ISREG(st->st_mode)) {
+			if (update_only
+			    && cmp_modtime(st->st_mtime, file->modtime) > 0) {
+				if (verbose > 1)
+					rprintf(FINFO,"%s is newer\n",fname);
+				return -2;
+			}
+			if (skip_file(fname, file, st)) {
+				set_perms(fname, file, st, 1);
+				return -2;
+			}
+		 	fd = do_open(fname, O_RDONLY, 0);
+			if (fd == -1) {
+				rprintf(FERROR,"failed to open %s, continuing : %s\n",fname,strerror(errno));
+				return -1;
+			} else
+				return fd;
+		} else {
+			/* Try to use symlink contents */
+			if (S_ISLNK(st->st_mode)) {
+				fd = do_open_regular(fname);
+				/* Don't delete yet; receiver will need it */
+			} else {
+				if (delete_file(fname) != 0) {
+					if (fd != -1)
+						close(fd);
+					return -2;
+				}
+			}
+		}
+	}
+
+	if (fd == -1 && compare_dest != NULL)
+		fd = open_alternate_base_comparedir(fname);
+
+	if (fd == -1 && fuzzy)
+		fd = open_alternate_base_fuzzy(fname);
+
+	/* Update stat to understand size */
+	if (fd != -1) {
+		if (do_fstat(fd, st) != 0)
+			rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
+	}
+
+	return fd;
+}
 
 void recv_generator(char *fname,struct file_list *flist,int i,int f_out)
 {  
@@ -184,12 +334,10 @@
 	struct sum_struct *s;
 	int statret;
 	struct file_struct *file = flist->files[i];
-	char *fnamecmp;
-	char fnamecmpbuf[MAXPATHLEN];
-	extern char *compare_dest;
 	extern int list_only;
 	extern int preserve_perms;
 	extern int only_existing;
+	int stat_errno;
 
 	if (list_only) return;
 
@@ -197,8 +345,9 @@
 		rprintf(FINFO,"recv_generator(%s,%d)\n",fname,i);
 
 	statret = link_stat(fname,&st);
+	stat_errno = errno;
 
-	if (only_existing && statret == -1 && errno == ENOENT) {
+	if (only_existing && statret == -1 && stat_errno == ENOENT) {
 		/* we only want to update existing files */
 		if (verbose > 1) rprintf(FINFO,"not creating %s\n",fname);
 		return;
@@ -214,84 +363,18 @@
 	}
 
 	if (S_ISDIR(file->mode)) {
-		if (dry_run) return;
-		if (statret == 0 && !S_ISDIR(st.st_mode)) {
-			if (robust_unlink(fname) != 0) {
-				rprintf(FERROR,"unlink %s : %s\n",fname,strerror(errno));
-				return;
-			}
-			statret = -1;
-		}
-		if (statret != 0 && do_mkdir(fname,file->mode) != 0 && errno != EEXIST) {
-			if (!(relative_paths && errno==ENOENT && 
-			      create_directory_path(fname)==0 && 
-			      do_mkdir(fname,file->mode)==0)) {
-				rprintf(FERROR,"mkdir %s : %s (2)\n",
-					fname,strerror(errno));
-			}
-		}
-		if (set_perms(fname,file,NULL,0) && verbose) 
-			rprintf(FINFO,"%s/\n",fname);
+		generate_directory(file, fname, statret, &st);
 		return;
 	}
 
 	if (preserve_links && S_ISLNK(file->mode)) {
-#if SUPPORT_LINKS
-		char lnk[MAXPATHLEN];
-		int l;
-		extern int safe_symlinks;
-
-		if (safe_symlinks && unsafe_symlink(file->link, fname)) {
-			if (verbose) {
-				rprintf(FINFO,"ignoring unsafe symlink %s -> %s\n",
-					fname,file->link);
-			}
-			return;
-		}
-		if (statret == 0) {
-			l = readlink(fname,lnk,MAXPATHLEN-1);
-			if (l > 0) {
-				lnk[l] = 0;
-				if (strcmp(lnk,file->link) == 0) {
-					set_perms(fname,file,&st,1);
-					return;
-				}
-			}
-			delete_file(fname);
-		}
-		if (do_symlink(file->link,fname) != 0) {
-			rprintf(FERROR,"symlink %s -> %s : %s\n",
-				fname,file->link,strerror(errno));
-		} else {
-			set_perms(fname,file,NULL,0);
-			if (verbose) {
-				rprintf(FINFO,"%s -> %s\n",
-					fname,file->link);
-			}
-		}
-#endif
+		generate_symlink(file, fname, statret, &st);
 		return;
 	}
 
 #ifdef HAVE_MKNOD
 	if (am_root && preserve_devices && IS_DEVICE(file->mode)) {
-		if (statret != 0 || 
-		    st.st_mode != file->mode ||
-		    st.st_rdev != file->rdev) {	
-			delete_file(fname);
-			if (verbose > 2)
-				rprintf(FINFO,"mknod(%s,0%o,0x%x)\n",
-					fname,(int)file->mode,(int)file->rdev);
-			if (do_mknod(fname,file->mode,file->rdev) != 0) {
-				rprintf(FERROR,"mknod %s : %s\n",fname,strerror(errno));
-			} else {
-				set_perms(fname,file,NULL,0);
-				if (verbose)
-					rprintf(FINFO,"%s\n",fname);
-			}
-		} else {
-			set_perms(fname,file,&st,1);
-		}
+		generate_device(file, fname, statret, &st);
 		return;
 	}
 #endif
@@ -307,74 +390,28 @@
 		return;
 	}
 
-	fnamecmp = fname;
-
-	if ((statret == -1) && (compare_dest != NULL)) {
-		/* try the file at compare_dest instead */
-		int saveerrno = errno;
-		slprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
-		statret = link_stat(fnamecmpbuf,&st);
-		if (!S_ISREG(st.st_mode))
-			statret = -1;
-		if (statret == -1)
-			errno = saveerrno;
-		else
-			fnamecmp = fnamecmpbuf;
-	}
-
-	if (statret == -1) {
-		if (errno == ENOENT) {
-			write_int(f_out,i);
-			if (!dry_run) send_sums(NULL,f_out);
-		} else {
-			if (verbose > 1)
-				rprintf(FERROR,"recv_generator failed to open %s\n",fname);
-		}
-		return;
-	}
-
-	if (!S_ISREG(st.st_mode)) {
-		if (delete_file(fname) != 0) {
-			return;
-		}
-
-		/* now pretend the file didn't exist */
-		write_int(f_out,i);
-		if (!dry_run) send_sums(NULL,f_out);    
-		return;
-	}
-
-	if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) {
+	/* Failed to stat for some other reason. */
+	if (statret == -1 && stat_errno != ENOENT) {
 		if (verbose > 1)
-			rprintf(FINFO,"%s is newer\n",fname);
+			rprintf(FERROR,"recv_generator failed to open %s\n",
+				fname);
 		return;
 	}
 
-	if (skip_file(fname, file, &st)) {
-		if (fnamecmp == fname)
-			set_perms(fname,file,&st,1);
+	fd = open_base_file(file, fname, statret, &st);
+	if (fd == -2)
 		return;
-	}
 
-	if (dry_run) {
-		write_int(f_out,i);
-		return;
+	if ((whole_file || dry_run) && fd != -1) {
+		close(fd);
+		fd = -1;
 	}
 
-	if (whole_file) {
-		write_int(f_out,i);
-		send_sums(NULL,f_out);    
-		return;
-	}
-
-	/* open the file */  
-	fd = do_open(fnamecmp, O_RDONLY, 0);
-
 	if (fd == -1) {
-		rprintf(FERROR,"failed to open %s, continuing : %s\n",fnamecmp,strerror(errno));
-		/* pretend the file didn't exist */
+		/* the file didn't exist, or we can pretend it doesn't */
 		write_int(f_out,i);
-		send_sums(NULL,f_out);
+		if (!dry_run)
+			send_sums(NULL,f_out);
 		return;
 	}
 
@@ -385,7 +422,7 @@
 	}
 
 	if (verbose > 3)
-		rprintf(FINFO,"gen mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
+		rprintf(FINFO,"gen mapped %s of size %.0f\n",fname,(double)st.st_size);
 
 	s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size));
 
diff -urN rsync-2.4.6/options.c rsync-latest/options.c
--- rsync-2.4.6/options.c	Wed Sep  6 13:46:43 2000
+++ rsync-latest/options.c	Thu Sep 28 15:42:22 2000
@@ -72,6 +72,7 @@
 #else
 int modify_window=0;
 #endif
+int fuzzy=0;
 int blocking_io=0;
 
 char *backup_suffix = BACKUP_SUFFIX;
@@ -172,6 +173,7 @@
   rprintf(F,"     --log-format=FORMAT     log file transfers using specified format\n");  
   rprintf(F,"     --password-file=FILE    get password from FILE\n");
   rprintf(F,"     --bwlimit=KBPS          limit I/O bandwidth, KBytes per second\n");
+  rprintf(F,"     --fuzzy	          use similar file as basis if it does't exist\n");
   rprintf(F," -h, --help                  show this help screen\n");
 
   rprintf(F,"\n");
@@ -188,7 +190,7 @@
       OPT_LOG_FORMAT, OPT_PASSWORD_FILE, OPT_SIZE_ONLY, OPT_ADDRESS,
       OPT_DELETE_AFTER, OPT_EXISTING, OPT_MAX_DELETE, OPT_BACKUP_DIR, 
       OPT_IGNORE_ERRORS, OPT_BWLIMIT, OPT_BLOCKING_IO,
-      OPT_MODIFY_WINDOW};
+      OPT_MODIFY_WINDOW, OPT_FUZZY};
 
 static char *short_options = "oblLWHpguDCtcahvqrRIxnSe:B:T:zP";
 
@@ -255,6 +257,7 @@
   {"address",     1,     0,    OPT_ADDRESS},
   {"max-delete",  1,     0,    OPT_MAX_DELETE},
   {"backup-dir",  1,     0,    OPT_BACKUP_DIR},
+  {"fuzzy",	  0,     0,    OPT_FUZZY},
   {0,0,0,0}};
 
 
@@ -596,6 +599,10 @@
 			backup_dir = optarg;
 			break;
 
+		case OPT_FUZZY:
+			fuzzy = 1;
+			break;
+
 		default:
 			slprintf(err_buf,sizeof(err_buf),"unrecognised option\n");
 			return 0;
@@ -767,7 +774,9 @@
 		args[ac++] = "--compare-dest";
 		args[ac++] = compare_dest;
 	}
-
+	
+	if (fuzzy && am_sender)
+		args[ac++] = "--fuzzy";
 
 	*argc = ac;
 }
diff -urN rsync-2.4.6/proto.h rsync-latest/proto.h
--- rsync-2.4.6/proto.h	Wed Sep  6 13:46:43 2000
+++ rsync-latest/proto.h	Thu Sep 28 21:32:14 2000
@@ -212,3 +212,6 @@
 int cmp_modtime(time_t file1, time_t file2);
 int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6);
 int sys_gettimeofday(struct timeval *tv);
+int do_open_regular(char *fname);
+int open_alternate_base_fuzzy(const char *fname);
+int open_alternate_base_comparedir(const char *fname);
diff -urN rsync-2.4.6/receiver.c rsync-latest/receiver.c
--- rsync-2.4.6/receiver.c	Fri Mar 31 00:23:03 2000
+++ rsync-latest/receiver.c	Thu Sep 28 21:47:10 2000
@@ -35,6 +35,7 @@
 extern char *compare_dest;
 extern int make_backups;
 extern char *backup_suffix;
+extern int fuzzy;
 
 static struct delete_list {
 	dev_t dev;
@@ -303,8 +303,6 @@
 	STRUCT_STAT st;
 	char *fname;
 	char fnametmp[MAXPATHLEN];
-	char *fnamecmp;
-	char fnamecmpbuf[MAXPATHLEN];
 	struct map_struct *buf;
 	int i;
 	struct file_struct *file;
@@ -362,28 +360,24 @@
 		if (verbose > 2)
 			rprintf(FINFO,"recv_files(%s)\n",fname);
 
-		fnamecmp = fname;
-
 		/* open the file */  
-		fd1 = do_open(fnamecmp, O_RDONLY, 0);
+		fd1 = do_open(fname, O_RDONLY, 0);
 
-		if ((fd1 == -1) && (compare_dest != NULL)) {
-			/* try the file at compare_dest instead */
-			slprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
-						compare_dest,fname);
-			fnamecmp = fnamecmpbuf;
-			fd1 = do_open(fnamecmp, O_RDONLY, 0);
-		}
+		if (fd1 == -1 && compare_dest != NULL)
+			fd1 = open_alternate_base_comparedir(fname);
+
+		if (fd1 == -1 && fuzzy)
+			fd1 = open_alternate_base_fuzzy(fname);
 
 		if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
-			rprintf(FERROR,"fstat %s : %s\n",fnamecmp,strerror(errno));
+			rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
 			receive_data(f_in,NULL,-1,NULL,file->length);
 			close(fd1);
 			continue;
 		}
 
 		if (fd1 != -1 && !S_ISREG(st.st_mode)) {
-			rprintf(FERROR,"%s : not a regular file (recv_files)\n",fnamecmp);
+			rprintf(FERROR,"%s : not a regular file (recv_files)\n",fname);
 			receive_data(f_in,NULL,-1,NULL,file->length);
 			close(fd1);
 			continue;
@@ -399,7 +393,7 @@
 		if (fd1 != -1 && st.st_size > 0) {
 			buf = map_file(fd1,st.st_size);
 			if (verbose > 2)
-				rprintf(FINFO,"recv mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
+				rprintf(FINFO,"recv mapped %s of size %.0f\n",fname,(double)st.st_size);
 		} else {
 			buf = NULL;
 		}
diff -urN rsync-2.4.6/util.c rsync-latest/util.c
--- rsync-2.4.6/util.c	Wed Sep  6 13:46:43 2000
+++ rsync-latest/util.c	Thu Sep 28 20:26:15 2000
@@ -963,7 +963,6 @@
  *******************************************************************/
 int cmp_modtime(time_t file1, time_t file2)
 {
-	time_t diff;
 	extern int modify_window;
 
 	if (file2 > file1) {



Reply to: