Greetings,
As recently discussed on IRC, I wrote a script to detect missing tags
on our packages repository.
It is run from the toplevel directory of the packages repository.
When run, it looks on the public UDD mirror for packages that are
missing a tag. For each package that misses a tag it downloads the
debian directory from a mirror (httpredir.debian.org/debian) and search
for the first commit (starting with the most recent commits) and tags
the first commit with a matching debian directory.
It doesn't handle native packages, as they don't have the debian
directory separated from the rest of the package. It does, however,
warn about such packages if they miss a tag.
The script is written in python3. To run it you need the following
packages installed:
* python3 (obviously...)
* python3-psycopg2 (For UDD access)
* python3-dulwich (For writing and reading git repositories)
* python3-apt (For reading the package cache)
* python3-requests (For retrieving the debian directories from the
mirror)
On most systems all, except psycopg2 and dulwich, of them will already
be installed.
As most affected packages haven't been changed since the transition,
the most recent commit has received most of the tags, but I think this
shouldn't be a problem, as uploading the package from that commit
should reproduce the package in the repository.
If no one objects to the above quirk, I will push the generated tags
soon.
The script is attached. I won't add it to pkg-haskell-tools, as it kind
of hacky around the edges and is missing a proper command line
interface and other things you expect from a proper script.
Regards
Sven
#!/usr/bin/bython3
import sys
import os
import tempfile
import tarfile
import io
import psycopg2
from dulwich.repo import Repo
from dulwich.objects import Blob, Tree
import apt_pkg
import requests
UDD = "udd"
UDD_USER = "public-udd-mirror"
UDD_PASSWORD = "public-udd-mirror"
UDD_HOST = "public-udd-mirror.xvm.mit.edu"
DEBIAN_MIRROR = "http://httpredir.debian.org/debian/"
class Package:
def __init__(self, name, version, has_tag):
self.name = name
self.version = version
self.has_tag = has_tag
def __repr__(self):
return "Package({}, {}, {})".format(self.name, self.version,
self.has_tag)
def __eq__(self, other):
try:
return self.name == other.name
except AttributeError:
return False
def __hash__(self):
return hash(self.name)
def main():
try:
release = sys.argv[1]
except IndexError:
release = "sid"
try:
os.chdir('p')
repo = Repo("..")
with psycopg2.connect(database=UDD, user=UDD_USER,
password=UDD_PASSWORD, host=UDD_HOST) \
as conn:
with conn.cursor() as cur:
cur.execute("SELECT role, release FROM public.releases "
"WHERE role != '';")
row = cur.fetchone()
while row:
if release == row[0]:
release = row[1]
break
row = cur.fetchone()
print("Searching untagged packages...")
untagged = set(find_untagged(release, conn, repo))
tag_all(untagged, repo)
finally:
repo.close()
def find_untagged(release, conn, repo):
dir_contents = os.listdir(".")
for name in dir_contents:
if os.path.isdir(name):
try:
package = check_package(name, release, conn, repo)
except RuntimeError:
continue
if not package.has_tag:
yield package
def check_package(pkg, release, conn, repo):
with conn.cursor() as cur:
cur.execute("SELECT version FROM public.all_sources WHERE "
"source = %s AND release = %s;", (pkg, release))
try:
version = cur.fetchone()[0]
except TypeError:
print(pkg + " not found in udd (maybe it is not in the"
" archive yet?)", file=sys.stderr)
raise RuntimeError("Package not in database")
tagpath = b'refs/tags/' + (pkg + "_v" +
version.replace(":", "_")).encode()
tagged = tagpath in repo.refs
return Package(pkg, version, tagged)
def tag_all(packages, repo):
print("Retrieving debian directories of untagged packages...")
get_archive_debian_dirs(packages)
print("Searching for right commits for tags")
# Iterate over commits in reverse order, to find the earliest
# commit to tag.
for entry in iter(repo.get_walker()):
commit = entry.commit
tree_id = commit.tree
tree = repo[tree_id]
tagged_now = set()
for package in packages:
git_dir = os.path.join(b'p', package.name.encode(),
b'debian')
try:
debian_tree_id = tree.lookup_path(repo.get_object,
git_dir)[1]
debian_tree = repo[debian_tree_id]
except KeyError:
# If the package doesn't exist in the commit, we
# won't tag it here.
continue
fs_path = os.path.join('../missing-tags', package.name, 'debian')
if git_fs_dir_eq(debian_tree, fs_path, repo):
tag = package.name + '_v' + \
package.version.replace(":", "_")
print("Tagging " + commit.id.decode() + " with " +
tag)
repo[b'refs/tags/' + tag.encode()] = commit.id
tagged_now.add(package)
packages -= tagged_now
if not packages:
break
if packages:
print("Could not find commits for these packages:")
for package in packages:
print(package.name)
def git_fs_dir_eq(git_tree, fs_path, repo):
for entry in git_tree.items():
path = entry.path.decode()
node = repo[entry.sha]
fs_file_path = os.path.join(fs_path, path)
if isinstance(node, Blob):
if not os.path.isfile(fs_file_path):
return False
if os.path.getsize(fs_file_path) != node.raw_length():
return False
git_content = node.as_raw_string()
with open(fs_file_path, 'rb') as file:
fs_content = file.read()
if git_content != fs_content:
return False
else:
assert isinstance(node, Tree)
if not git_fs_dir_eq(node, fs_file_path, repo):
return False
return True
def get_archive_debian_dirs(packages):
#try:
#tempdir = tempfile.TemporaryDirectory()
apt_pkg.init()
for package in packages:
try:
url = get_url(package)
except RuntimeError:
continue
#unpack_dir = os.path.join(tempdir.name, package.name)
unpack_dir = os.path.join('../missing-tags/', package.name)
if not os.path.isdir(unpack_dir):
r = requests.get(url, stream=True)
with io.BytesIO(r.content) as tarstream, \
tarfile.open(fileobj=tarstream) as tar:
tar.extractall(unpack_dir)
# Return the directory for cleanup at caller site.
#return tempdir
#except:
# tempdir.cleanup()
# raise
def get_url(package):
source_recs = apt_pkg.SourceRecords()
version = None
while version != package.version:
source_recs.lookup(package.name)
version = source_recs.version
for file in source_recs.files:
if file[3] == 'diff':
return DEBIAN_MIRROR + file[2]
print("WARNING: {} seems to be native. IT WILL NOT BE"
" CHECKED!".format(package.name), file=sys.stderr)
raise RuntimeError(package.name + " has no diff file")
if __name__ == '__main__':
main()
Attachment:
pgpLlz2VvBdQg.pgp
Description: Digitale Signatur von OpenPGP