Greetings, As recently discussed on IRC, I wrote a script to detect missing tags on our packages repository. It is run from the toplevel directory of the packages repository. When run, it looks on the public UDD mirror for packages that are missing a tag. For each package that misses a tag it downloads the debian directory from a mirror (httpredir.debian.org/debian) and search for the first commit (starting with the most recent commits) and tags the first commit with a matching debian directory. It doesn't handle native packages, as they don't have the debian directory separated from the rest of the package. It does, however, warn about such packages if they miss a tag. The script is written in python3. To run it you need the following packages installed: * python3 (obviously...) * python3-psycopg2 (For UDD access) * python3-dulwich (For writing and reading git repositories) * python3-apt (For reading the package cache) * python3-requests (For retrieving the debian directories from the mirror) On most systems all, except psycopg2 and dulwich, of them will already be installed. As most affected packages haven't been changed since the transition, the most recent commit has received most of the tags, but I think this shouldn't be a problem, as uploading the package from that commit should reproduce the package in the repository. If no one objects to the above quirk, I will push the generated tags soon. The script is attached. I won't add it to pkg-haskell-tools, as it kind of hacky around the edges and is missing a proper command line interface and other things you expect from a proper script. Regards Sven
#!/usr/bin/bython3 import sys import os import tempfile import tarfile import io import psycopg2 from dulwich.repo import Repo from dulwich.objects import Blob, Tree import apt_pkg import requests UDD = "udd" UDD_USER = "public-udd-mirror" UDD_PASSWORD = "public-udd-mirror" UDD_HOST = "public-udd-mirror.xvm.mit.edu" DEBIAN_MIRROR = "http://httpredir.debian.org/debian/" class Package: def __init__(self, name, version, has_tag): self.name = name self.version = version self.has_tag = has_tag def __repr__(self): return "Package({}, {}, {})".format(self.name, self.version, self.has_tag) def __eq__(self, other): try: return self.name == other.name except AttributeError: return False def __hash__(self): return hash(self.name) def main(): try: release = sys.argv[1] except IndexError: release = "sid" try: os.chdir('p') repo = Repo("..") with psycopg2.connect(database=UDD, user=UDD_USER, password=UDD_PASSWORD, host=UDD_HOST) \ as conn: with conn.cursor() as cur: cur.execute("SELECT role, release FROM public.releases " "WHERE role != '';") row = cur.fetchone() while row: if release == row[0]: release = row[1] break row = cur.fetchone() print("Searching untagged packages...") untagged = set(find_untagged(release, conn, repo)) tag_all(untagged, repo) finally: repo.close() def find_untagged(release, conn, repo): dir_contents = os.listdir(".") for name in dir_contents: if os.path.isdir(name): try: package = check_package(name, release, conn, repo) except RuntimeError: continue if not package.has_tag: yield package def check_package(pkg, release, conn, repo): with conn.cursor() as cur: cur.execute("SELECT version FROM public.all_sources WHERE " "source = %s AND release = %s;", (pkg, release)) try: version = cur.fetchone()[0] except TypeError: print(pkg + " not found in udd (maybe it is not in the" " archive yet?)", file=sys.stderr) raise RuntimeError("Package not in database") tagpath = b'refs/tags/' + (pkg + "_v" + version.replace(":", "_")).encode() tagged = tagpath in repo.refs return Package(pkg, version, tagged) def tag_all(packages, repo): print("Retrieving debian directories of untagged packages...") get_archive_debian_dirs(packages) print("Searching for right commits for tags") # Iterate over commits in reverse order, to find the earliest # commit to tag. for entry in iter(repo.get_walker()): commit = entry.commit tree_id = commit.tree tree = repo[tree_id] tagged_now = set() for package in packages: git_dir = os.path.join(b'p', package.name.encode(), b'debian') try: debian_tree_id = tree.lookup_path(repo.get_object, git_dir)[1] debian_tree = repo[debian_tree_id] except KeyError: # If the package doesn't exist in the commit, we # won't tag it here. continue fs_path = os.path.join('../missing-tags', package.name, 'debian') if git_fs_dir_eq(debian_tree, fs_path, repo): tag = package.name + '_v' + \ package.version.replace(":", "_") print("Tagging " + commit.id.decode() + " with " + tag) repo[b'refs/tags/' + tag.encode()] = commit.id tagged_now.add(package) packages -= tagged_now if not packages: break if packages: print("Could not find commits for these packages:") for package in packages: print(package.name) def git_fs_dir_eq(git_tree, fs_path, repo): for entry in git_tree.items(): path = entry.path.decode() node = repo[entry.sha] fs_file_path = os.path.join(fs_path, path) if isinstance(node, Blob): if not os.path.isfile(fs_file_path): return False if os.path.getsize(fs_file_path) != node.raw_length(): return False git_content = node.as_raw_string() with open(fs_file_path, 'rb') as file: fs_content = file.read() if git_content != fs_content: return False else: assert isinstance(node, Tree) if not git_fs_dir_eq(node, fs_file_path, repo): return False return True def get_archive_debian_dirs(packages): #try: #tempdir = tempfile.TemporaryDirectory() apt_pkg.init() for package in packages: try: url = get_url(package) except RuntimeError: continue #unpack_dir = os.path.join(tempdir.name, package.name) unpack_dir = os.path.join('../missing-tags/', package.name) if not os.path.isdir(unpack_dir): r = requests.get(url, stream=True) with io.BytesIO(r.content) as tarstream, \ tarfile.open(fileobj=tarstream) as tar: tar.extractall(unpack_dir) # Return the directory for cleanup at caller site. #return tempdir #except: # tempdir.cleanup() # raise def get_url(package): source_recs = apt_pkg.SourceRecords() version = None while version != package.version: source_recs.lookup(package.name) version = source_recs.version for file in source_recs.files: if file[3] == 'diff': return DEBIAN_MIRROR + file[2] print("WARNING: {} seems to be native. IT WILL NOT BE" " CHECKED!".format(package.name), file=sys.stderr) raise RuntimeError(package.name + " has no diff file") if __name__ == '__main__': main()
Attachment:
pgpLlz2VvBdQg.pgp
Description: Digitale Signatur von OpenPGP