Re: perl code for decoding tar files...
joey@finlandia.infodrom.north.de (Martin Schulze) wrote on 09.07.97 in <[🔎] m0wm0zV-000VXIC@finlandia.Infodrom.North.DE>:
> joost witteveen writes:
>
> > > > I seem to remember someone commenting that they had perl code for
> > > > decoding tar files laying around. If so, could I get a copy, so I may
> > > > attempt to ameliorate dpkg-source's inability to cope with 100+
> > > > character filenames?
> > >
> > > Please remember that the problem also exists in dpkg-deb with
> > > the tar extracter.
>
> > BTW, that dpkg-dev problem, couldn't that be because dpkg-dev uses
> > dpkg-source? (Just guessing).
>
> No, the dpkg-deb problem is somewhat different. While generating
> a .deb file dpkg-deb uses "tar c --exlude DEBIAN f - ". If GNU
> tar encountes a long filename, it uses two records in the
> resulting tar file, one starting/ending with something like
> /@/LongLink/@/.
This is only for symbolic link *targets* longer than 100 chars. POSIX tar
can't handle those at all. It should be a fairly uncommon problem, though.
As to the "normal" 100 char limit, which POSIX tar handles just fine, I
believe I had such a source, even posted it here. Now where did I put it?
Ah, in /usr/local/bin. Here it is, a perl program to compare tar files to
the file system. No guarantees, but I believe it does work; feel free to
use the source for whatever you want. Includes /usr/include/tar.h as a
trailing comment, which (I believe) is under the LGPL.
MfG Kai
#! /usr/bin/perl -w
use strict;
$| = 1;
binmode STDIN;
my ($n, $hdr, $ck, $data, $fluff, $fluffdata, $zeroes, $fn, $f_link, $f_user,
$f_group, $f_data);
my ($t_name, $t_mode, $t_uid, $t_gid, $t_size, $t_mtime, $t_chksum, $t_typeflag,
$t_linkname, $t_magic, $t_version, $t_uname, $t_gname, $t_devmajor,
$t_devminor, $t_prefix);
my ($f_dev, $f_ino, $f_mode, $f_nlink, $f_uid, $f_gid, $f_rdev, $f_size,
$f_atime, $f_mtime, $f_ctime, $f_blksize, $f_blocks);
my ($entries, $matches, $ok, $fentries, $fmatches, $nofile);
$hdr = $data = $fluffdata = $f_data = '';
$nofile = $fentries = $fmatches = $entries = $matches = $zeroes = 0;
while (1) {
$n = read(STDIN, $hdr, 512);
last if $n == 0;
if ($n != 512) {
print "premature eof: read $n, not 512\n";
die;
}
if (0 == unpack('%32C*', $hdr)) {
$zeroes++;
next;
}
else {
$zeroes = 0;
}
($t_name, $t_mode, $t_uid, $t_gid, $t_size, $t_mtime, $t_chksum,
$t_typeflag, $t_linkname, $t_magic, $t_version, $t_uname, $t_gname,
$t_devmajor, $t_devminor, $t_prefix) =
unpack('A100A8A8A8A12A12A8a1A100A6A2A32A32A8A155', $hdr);
if ($t_typeflag le '1') {
$fentries++;
}
$t_mode = oct($t_mode);
$t_uid = oct($t_uid);
$t_gid = oct($t_gid);
$t_size = oct($t_size);
$t_mtime = oct($t_mtime);
$t_chksum = oct($t_chksum);
$t_devmajor = oct($t_devmajor);
$t_devminor = oct($t_devminor);
$fn = $t_name;
$fn = $t_prefix . "/" . $t_name if $t_prefix;
$fn = "/" . $fn;
$ck = unpack('%32C*', substr($hdr, 0, 148) . ' 'x8 .
substr($hdr, 148+8, 512-148-8));
if ($ck != $t_chksum) {
print "bad checksum\n";
die;
}
$n = read(STDIN, $data, $t_size);
if ($n != $t_size) {
print "premature eof: read $n, not $t_size\n";
die;
}
$fluff = 512 - $t_size % 512;
$fluff = 0 if $fluff == 512;
$n = read(STDIN, $fluffdata, $fluff);
if ($n != $fluff) {
print "premature eof: read $n, not $fluff\n";
die;
}
$entries++;
$ok = 1;
($f_dev, $f_ino, $f_mode, $f_nlink, $f_uid, $f_gid, $f_rdev, $f_size,
$f_atime, $f_mtime, $f_ctime, $f_blksize, $f_blocks) = lstat($fn);
if (!defined $f_mode) {
print "$fn: missing\n";
if ($t_typeflag le '1') {
$nofile++;
}
next;
}
if ($t_mode != $f_mode) {
printf "$fn: mode is %o, not %o\n", $f_mode, $t_mode;
$ok = 0;
}
$f_user = getpwuid($f_uid);
$f_user = '?' if !defined($f_user);
if ($t_uname ne $f_user) {
printf "$fn: uid is %d (%s), not %d (%s)\n",
$f_uid, $f_user, $t_uid, $t_uname;
$ok = 0;
}
$f_group = getgrgid($f_gid);
$f_group = '?' if !defined($f_group);
if ($t_gname ne $f_group) {
printf "$fn: gid is %d (%s), not %d (%s)\n",
$f_gid, $f_group, $t_gid, $t_gname;
$ok = 0;
}
if ($t_typeflag le '1') {
if ($t_size == $f_size) {
open(F, $fn);
$n = read(F, $f_data, $f_size);
close(F);
if ($n == $f_size) {
if ($data ne $f_data) {
print "$fn: contents differ\n";
$ok = 0;
}
}
else {
print "$fn: can't read\n";
$ok = 0;
}
}
else {
printf "$fn: size is %d, not %d\n", $f_size, $t_size;
$ok = 0;
}
if ($ok) {
$fmatches++;
printf "$fn: ok\n";
}
}
if ($t_typeflag eq '2') {
$f_link = readlink($fn);
if (defined $f_link) {
if ($f_link ne $t_linkname) {
printf "$fn: points to %s, not %s\n",
$f_link, $t_linkname;
$ok = 0;
}
}
else {
printf "$fn: should be a symlink to %s\n", $t_linkname;
$ok = 0;
}
}
if ($t_typeflag eq '3' || $t_typeflag eq '4') {
if ($f_rdev != $t_devmajor*256 + $t_devminor) {
printf "$fn: dev=(%d,%d), not (%d,%d)\n",
$f_rdev/256, $f_rdev%256, $t_devmajor, $t_devminor;
$ok = 0;
}
}
$matches++ if $ok;
}
printf "\n\n Entries = %d(%d), Matches = %d(%d), Bad = %d(%d), Files = %d\n",
$entries, $fentries, $matches, $fmatches,
$entries-$matches, $fentries-$fmatches, $fentries-$nofile;
# /* Extended tar format from POSIX.1.
# Copyright (C) 1992 Free Software Foundation, Inc.
# Written by David J. MacKenzie.
#
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with the GNU C Library; see the file COPYING.LIB. If
# not, write to the, 1992 Free Software Foundation, Inc., 675 Mass Ave,
# Cambridge, MA 02139, USA. */
#
# #ifndef _TAR_H
#
# #define _TAR_H 1
#
# /* A tar archive consists of 512-byte blocks.
# Each file in the archive has a header block followed by 0+ data blocks.
# Two blocks of NUL bytes indicate the end of the archive. */
#
# /* The fields of header blocks:
# All strings are stored as ISO 646 (approximately ASCII) strings.
#
# Fields are numeric unless otherwise noted below; numbers are ISO 646
# representations of octal numbers, with leading zeros as needed.
#
# linkname is only valid when typeflag==LNKTYPE. It doesn't use prefix;
# files that are links to pathnames >100 chars long can not be stored
# in a tar archive.
#
# If typeflag=={LNKTYPE,SYMTYPE,DIRTYPE} then size must be 0.
#
# devmajor and devminor are only valid for typeflag=={BLKTYPE,CHRTYPE}.
#
# chksum contains the sum of all 512 bytes in the header block,
# treating each byte as an 8-bit unsigned value and treating the
# 8 bytes of chksum as blank characters.
#
# uname and gname are used in preference to uid and gid, if those
# names exist locally.
#
# Field Name Byte Offset Length in Bytes Field Type
# name 0 100 NUL-terminated if NUL fits
# mode 100 8
# uid 108 8
# gid 116 8
# size 124 12
# mtime 136 12
# chksum 148 8
# typeflag 156 1 see below
# linkname 157 100 NUL-terminated if NUL fits
# magic 257 6 must be TMAGIC (NUL term.)
# version 263 2 must be TVERSION
# uname 265 32 NUL-terminated
# gname 297 32 NUL-terminated
# devmajor 329 8
# devminor 337 8
# prefix 345 155 NUL-terminated if NUL fits
#
# If the first character of prefix is '\0', the file name is name;
# otherwise, it is prefix/name. Files whose pathnames don't fit in that
# length can not be stored in a tar archive. */
#
# /* The bits in mode: */
# #define TSUID 04000
# #define TSGID 02000
# #define TSVTX 01000
# #define TUREAD 00400
# #define TUWRITE 00200
# #define TUEXEC 00100
# #define TGREAD 00040
# #define TGWRITE 00020
# #define TGEXEC 00010
# #define TOREAD 00004
# #define TOWRITE 00002
# #define TOEXEC 00001
#
# /* The values for typeflag:
# Values 'A'-'Z' are reserved for custom implementations.
# All other values are reserved for future POSIX.1 revisions. */
#
# #define REGTYPE '0' /* Regular file (preferred code). */
# #define AREGTYPE '\0' /* Regular file (alternate code). */
# #define LNKTYPE '1' /* Hard link. */
# #define SYMTYPE '2' /* Symbolic link (hard if not supported). */
# #define CHRTYPE '3' /* Character special. */
# #define BLKTYPE '4' /* Block special. */
# #define DIRTYPE '5' /* Directory. */
# #define FIFOTYPE '6' /* Named pipe. */
# #define CONTTYPE '7' /* Contiguous file */
# /* (regular file if not supported). */
#
# /* Contents of magic field and its length. */
# #define TMAGIC "ustar"
# #define TMAGLEN 6
#
# /* Contents of the version field and its length. */
# #define TVERSION "00"
# #define TVERSLEN 2
#
# #endif /* tar.h */
--
TO UNSUBSCRIBE FROM THIS MAILING LIST: e-mail the word "unsubscribe" to
debian-devel-request@lists.debian.org .
Trouble? e-mail to templin@bucknell.edu .
Reply to: