[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: perl code for decoding tar files...



joey@finlandia.infodrom.north.de (Martin Schulze)  wrote on 09.07.97 in <[🔎] m0wm0zV-000VXIC@finlandia.Infodrom.North.DE>:

> joost witteveen writes:
>
> > > > I seem to remember someone commenting that they had perl code for
> > > > decoding tar files laying around.  If so, could I get a copy, so I may
> > > > attempt to ameliorate dpkg-source's inability to cope with 100+
> > > > character filenames?
> > >
> > > Please remember that the problem also exists in dpkg-deb with
> > > the tar extracter.
>
> > BTW, that dpkg-dev problem, couldn't that be because dpkg-dev uses
> > dpkg-source? (Just guessing).
>
> No, the dpkg-deb problem is somewhat different.  While generating
> a .deb file dpkg-deb uses "tar c --exlude DEBIAN f - ".  If GNU
> tar encountes a long filename, it uses two records in the
> resulting tar file, one starting/ending with something like
> /@/LongLink/@/.

This is only for symbolic link *targets* longer than 100 chars. POSIX tar  
can't handle those at all. It should be a fairly uncommon problem, though.

As to the "normal" 100 char limit, which POSIX tar handles just fine, I  
believe I had such a source, even posted it here. Now where did I put it?

Ah, in /usr/local/bin. Here it is, a perl program to compare tar files to  
the file system. No guarantees, but I believe it does work; feel free to  
use the source for whatever you want. Includes /usr/include/tar.h as a  
trailing comment, which (I believe) is under the LGPL.

MfG Kai

#! /usr/bin/perl -w

use strict;

$| = 1;

binmode STDIN;

my ($n, $hdr, $ck, $data, $fluff, $fluffdata, $zeroes, $fn, $f_link, $f_user,
    $f_group, $f_data);
my ($t_name, $t_mode, $t_uid, $t_gid, $t_size, $t_mtime, $t_chksum, $t_typeflag,
    $t_linkname, $t_magic, $t_version, $t_uname, $t_gname, $t_devmajor,
    $t_devminor, $t_prefix);
my ($f_dev, $f_ino, $f_mode, $f_nlink, $f_uid, $f_gid, $f_rdev, $f_size,
    $f_atime, $f_mtime, $f_ctime, $f_blksize, $f_blocks);
my ($entries, $matches, $ok, $fentries, $fmatches, $nofile);

$hdr = $data = $fluffdata = $f_data = '';
$nofile = $fentries = $fmatches = $entries = $matches = $zeroes = 0;
while (1) {
	$n = read(STDIN, $hdr, 512);
	last if $n == 0;
	if ($n != 512) {
		print "premature eof: read $n, not 512\n";
		die;
	}
	if (0 == unpack('%32C*', $hdr)) {
		$zeroes++;
		next;
	}
	else {
		$zeroes = 0;
	}
	($t_name, $t_mode, $t_uid, $t_gid, $t_size, $t_mtime, $t_chksum,
	 $t_typeflag, $t_linkname, $t_magic, $t_version, $t_uname, $t_gname,
	 $t_devmajor, $t_devminor, $t_prefix) =
	 unpack('A100A8A8A8A12A12A8a1A100A6A2A32A32A8A155', $hdr);
	if ($t_typeflag le '1') {
		$fentries++;
	}
	$t_mode = oct($t_mode);
	$t_uid = oct($t_uid);
	$t_gid = oct($t_gid);
	$t_size = oct($t_size);
	$t_mtime = oct($t_mtime);
	$t_chksum = oct($t_chksum);
	$t_devmajor = oct($t_devmajor);
	$t_devminor = oct($t_devminor);
	$fn = $t_name;
	$fn = $t_prefix . "/" . $t_name if $t_prefix;
	$fn = "/" . $fn;
	$ck = unpack('%32C*', substr($hdr, 0, 148) . ' 'x8 .
				substr($hdr, 148+8, 512-148-8));
	if ($ck != $t_chksum) {
		print "bad checksum\n";
		die;
	}
	$n = read(STDIN, $data, $t_size);
	if ($n != $t_size) {
		print "premature eof: read $n, not $t_size\n";
		die;
	}
	$fluff = 512 - $t_size % 512;
	$fluff = 0 if $fluff == 512;
	$n = read(STDIN, $fluffdata, $fluff);
	if ($n != $fluff) {
		print "premature eof: read $n, not $fluff\n";
		die;
	}
	$entries++;
	$ok = 1;
	($f_dev, $f_ino, $f_mode, $f_nlink, $f_uid, $f_gid, $f_rdev, $f_size,
         $f_atime, $f_mtime, $f_ctime, $f_blksize, $f_blocks) = lstat($fn);
	if (!defined $f_mode) {
		print "$fn: missing\n";
		if ($t_typeflag le '1') {
			$nofile++;
		}
		next;
	}
	if ($t_mode != $f_mode) {
		printf "$fn: mode is %o, not %o\n", $f_mode, $t_mode;
		$ok = 0;
	}
	$f_user = getpwuid($f_uid);
	$f_user = '?' if !defined($f_user);
	if ($t_uname ne $f_user) {
		printf "$fn: uid is %d (%s), not %d (%s)\n",
			$f_uid, $f_user, $t_uid, $t_uname;
		$ok = 0;
	}
	$f_group = getgrgid($f_gid);
	$f_group = '?' if !defined($f_group);
	if ($t_gname ne $f_group) {
		printf "$fn: gid is %d (%s), not %d (%s)\n",
			$f_gid, $f_group, $t_gid, $t_gname;
		$ok = 0;
	}
	if ($t_typeflag le '1') {
		if ($t_size == $f_size) {
			open(F, $fn);
			$n = read(F, $f_data, $f_size);
			close(F);
			if ($n == $f_size) {
				if ($data ne $f_data) {
					print "$fn: contents differ\n";
					$ok = 0;
				}
			}
			else {
				print "$fn: can't read\n";
				$ok = 0;
			}
		}
		else {
			printf "$fn: size is %d, not %d\n", $f_size, $t_size;
			$ok = 0;
		}
		if ($ok) {
			$fmatches++;
			printf "$fn: ok\n";
		}
	}
	if ($t_typeflag eq '2') {
		$f_link = readlink($fn);
		if (defined $f_link) {
			if ($f_link ne $t_linkname) {
				printf "$fn: points to %s, not %s\n",
					$f_link, $t_linkname;
				$ok = 0;
			}
		}
		else {
			printf "$fn: should be a symlink to %s\n", $t_linkname;
			$ok = 0;
		}
	}
	if ($t_typeflag eq '3' || $t_typeflag eq '4') {
		if ($f_rdev != $t_devmajor*256 + $t_devminor) {
			printf "$fn: dev=(%d,%d), not (%d,%d)\n",
				$f_rdev/256, $f_rdev%256, $t_devmajor, $t_devminor;
			$ok = 0;
		}
	}
	$matches++ if $ok;
}

printf "\n\n Entries = %d(%d), Matches = %d(%d), Bad = %d(%d), Files = %d\n",
	$entries, $fentries, $matches, $fmatches,
	$entries-$matches, $fentries-$fmatches, $fentries-$nofile;

#	/* Extended tar format from POSIX.1.
#	   Copyright (C) 1992 Free Software Foundation, Inc.
#	   Written by David J. MacKenzie.
#	
#	This file is part of the GNU C Library.
#	
#	The GNU C Library is free software; you can redistribute it and/or
#	modify it under the terms of the GNU Library General Public License as
#	published by the Free Software Foundation; either version 2 of the
#	License, or (at your option) any later version.
#	
#	The GNU C Library is distributed in the hope that it will be useful,
#	but WITHOUT ANY WARRANTY; without even the implied warranty of
#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#	Library General Public License for more details.
#	
#	You should have received a copy of the GNU Library General Public
#	License along with the GNU C Library; see the file COPYING.LIB.  If
#	not, write to the, 1992 Free Software Foundation, Inc., 675 Mass Ave,
#	Cambridge, MA 02139, USA.  */
#	
#	#ifndef	_TAR_H
#	
#	#define	_TAR_H	1
#	
#	/* A tar archive consists of 512-byte blocks.
#	   Each file in the archive has a header block followed by 0+ data blocks.
#	   Two blocks of NUL bytes indicate the end of the archive.  */
#	
#	/* The fields of header blocks:
#	   All strings are stored as ISO 646 (approximately ASCII) strings.
#	
#	   Fields are numeric unless otherwise noted below; numbers are ISO 646
#	   representations of octal numbers, with leading zeros as needed.
#	
#	   linkname is only valid when typeflag==LNKTYPE.  It doesn't use prefix;
#	   files that are links to pathnames >100 chars long can not be stored
#	   in a tar archive.
#	
#	   If typeflag=={LNKTYPE,SYMTYPE,DIRTYPE} then size must be 0.
#	
#	   devmajor and devminor are only valid for typeflag=={BLKTYPE,CHRTYPE}.
#	
#	   chksum contains the sum of all 512 bytes in the header block,
#	   treating each byte as an 8-bit unsigned value and treating the
#	   8 bytes of chksum as blank characters.
#	
#	   uname and gname are used in preference to uid and gid, if those
#	   names exist locally.
#	
#	   Field Name	Byte Offset	Length in Bytes	Field Type
#	   name		0		100		NUL-terminated if NUL fits
#	   mode		100		8
#	   uid		108		8
#	   gid		116		8
#	   size		124		12
#	   mtime	136		12
#	   chksum	148		8
#	   typeflag	156		1		see below
#	   linkname	157		100		NUL-terminated if NUL fits
#	   magic	257		6		must be TMAGIC (NUL term.)
#	   version	263		2		must be TVERSION
#	   uname	265		32		NUL-terminated
#	   gname	297		32		NUL-terminated
#	   devmajor	329		8
#	   devminor	337		8
#	   prefix	345		155		NUL-terminated if NUL fits
#	
#	   If the first character of prefix is '\0', the file name is name;
#	   otherwise, it is prefix/name.  Files whose pathnames don't fit in that
#	   length can not be stored in a tar archive.  */
#	
#	/* The bits in mode: */
#	#define TSUID	04000
#	#define TSGID	02000
#	#define TSVTX	01000
#	#define TUREAD	00400
#	#define TUWRITE	00200
#	#define TUEXEC	00100
#	#define TGREAD	00040
#	#define TGWRITE	00020
#	#define TGEXEC	00010
#	#define TOREAD	00004
#	#define TOWRITE	00002
#	#define TOEXEC	00001
#	
#	/* The values for typeflag:
#	   Values 'A'-'Z' are reserved for custom implementations.
#	   All other values are reserved for future POSIX.1 revisions.  */
#	
#	#define REGTYPE		'0'	/* Regular file (preferred code).  */
#	#define AREGTYPE	'\0'	/* Regular file (alternate code).  */
#	#define LNKTYPE		'1'	/* Hard link.  */
#	#define SYMTYPE		'2'	/* Symbolic link (hard if not supported).  */
#	#define CHRTYPE		'3'	/* Character special.  */
#	#define BLKTYPE		'4'	/* Block special.  */
#	#define DIRTYPE		'5'	/* Directory.  */
#	#define FIFOTYPE	'6'	/* Named pipe.  */
#	#define CONTTYPE	'7'	/* Contiguous file */
#	 /* (regular file if not supported).  */
#	
#	/* Contents of magic field and its length.  */
#	#define TMAGIC	"ustar"
#	#define TMAGLEN	6
#	
#	/* Contents of the version field and its length.  */
#	#define TVERSION	"00"
#	#define TVERSLEN	2
#	
#	#endif /* tar.h */


--
TO UNSUBSCRIBE FROM THIS MAILING LIST: e-mail the word "unsubscribe" to
debian-devel-request@lists.debian.org . 
Trouble?  e-mail to templin@bucknell.edu .


Reply to: