[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

missing-tag script



Greetings,

As recently discussed on IRC, I wrote a script to detect missing tags
on our packages repository.

It is run from the toplevel directory of the packages repository.
When run, it looks on the public UDD mirror for packages that are
missing a tag. For each package that misses a tag it downloads the
debian directory from a mirror (httpredir.debian.org/debian) and search
for the first commit (starting with the most recent commits) and tags
the first commit with a matching debian directory.

It doesn't handle native packages, as they don't have the debian
directory separated from the rest of the package. It does, however,
warn about such packages if they miss a tag.

The script is written in python3. To run it you need the following
packages installed:
 * python3 (obviously...)
 * python3-psycopg2 (For UDD access)
 * python3-dulwich (For writing and reading git repositories)
 * python3-apt (For reading the package cache)
 * python3-requests (For retrieving the debian directories from the
                     mirror)

On most systems all, except psycopg2 and dulwich, of them will already
be installed.

As most affected packages haven't been changed since the transition,
the most recent commit has received most of the tags, but I think this
shouldn't be a problem, as uploading the package from that commit
should reproduce the package in the repository.

If no one objects to the above quirk, I will push the generated tags
soon.

The script is attached. I won't add it to pkg-haskell-tools, as it kind
of hacky around the edges and is missing a proper command line
interface and other things you expect from a proper script.

Regards
Sven
#!/usr/bin/bython3

import sys
import os
import tempfile
import tarfile
import io

import psycopg2
from dulwich.repo import Repo
from dulwich.objects import Blob, Tree
import apt_pkg
import requests

UDD = "udd"
UDD_USER = "public-udd-mirror"
UDD_PASSWORD = "public-udd-mirror"
UDD_HOST = "public-udd-mirror.xvm.mit.edu"

DEBIAN_MIRROR = "http://httpredir.debian.org/debian/";

class Package:
    def __init__(self, name, version, has_tag):
        self.name = name
        self.version = version
        self.has_tag = has_tag

    def __repr__(self):
        return "Package({}, {}, {})".format(self.name, self.version,
                                            self.has_tag)

    def __eq__(self, other):
        try:
            return self.name == other.name
        except AttributeError:
            return False

    def __hash__(self):
        return hash(self.name)

def main():
    try:
        release = sys.argv[1]
    except IndexError:
        release = "sid"
    try:
        os.chdir('p')
        repo = Repo("..")
        with psycopg2.connect(database=UDD, user=UDD_USER,
                              password=UDD_PASSWORD, host=UDD_HOST) \
                              as conn:
            with conn.cursor() as cur:
                cur.execute("SELECT role, release FROM public.releases "
                            "WHERE role != '';")
                row = cur.fetchone()
                while row:
                    if release == row[0]:
                        release = row[1]
                        break
                    row = cur.fetchone()
            print("Searching untagged packages...")
            untagged = set(find_untagged(release, conn, repo))
        tag_all(untagged, repo)
    finally:
        repo.close()

def find_untagged(release, conn, repo):
    dir_contents = os.listdir(".")
    for name in dir_contents:
        if os.path.isdir(name):
            try:
                package = check_package(name, release, conn, repo)
            except RuntimeError:
                continue
            if not package.has_tag:
                yield package

def check_package(pkg, release, conn, repo):
    with conn.cursor() as cur:
        cur.execute("SELECT version FROM public.all_sources WHERE "
                    "source = %s AND release = %s;", (pkg, release))
        try:
            version = cur.fetchone()[0]
        except TypeError:
            print(pkg + " not found in udd (maybe it is not in the"
                  " archive yet?)", file=sys.stderr)
            raise RuntimeError("Package not in database")
    tagpath = b'refs/tags/' + (pkg + "_v" +
                               version.replace(":", "_")).encode()
    tagged = tagpath in repo.refs
    return Package(pkg, version, tagged)

def tag_all(packages, repo):
    print("Retrieving debian directories of untagged packages...")
    get_archive_debian_dirs(packages)
    print("Searching for right commits for tags")
    # Iterate over commits in reverse order, to find the earliest
    # commit to tag.
    for entry in iter(repo.get_walker()):
        commit = entry.commit
        tree_id = commit.tree
        tree = repo[tree_id]
        tagged_now = set()
        for package in packages:
            git_dir = os.path.join(b'p', package.name.encode(),
                                   b'debian')
            try:
                debian_tree_id = tree.lookup_path(repo.get_object,
                                                  git_dir)[1]
                debian_tree = repo[debian_tree_id]
            except KeyError:
                # If the package doesn't exist in the commit, we
                # won't tag it here.
                continue
            fs_path = os.path.join('../missing-tags', package.name, 'debian')
            if git_fs_dir_eq(debian_tree, fs_path, repo):
                tag = package.name + '_v' + \
                      package.version.replace(":", "_")
                print("Tagging " + commit.id.decode() + " with " +
                      tag)
                repo[b'refs/tags/' + tag.encode()] = commit.id
                tagged_now.add(package)
        packages -= tagged_now
        if not packages:
            break
    if packages:
        print("Could not find commits for these packages:")
        for package in packages:
            print(package.name)

def git_fs_dir_eq(git_tree, fs_path, repo):
    for entry in git_tree.items():
        path = entry.path.decode()
        node = repo[entry.sha]
        fs_file_path = os.path.join(fs_path, path)
        if isinstance(node, Blob):
            if not os.path.isfile(fs_file_path):
                return False
            if os.path.getsize(fs_file_path) != node.raw_length():
                return False
            git_content = node.as_raw_string()
            with open(fs_file_path, 'rb') as file:
                fs_content = file.read()
            if git_content != fs_content:
                return False
        else:
            assert isinstance(node, Tree)
            if not git_fs_dir_eq(node, fs_file_path, repo):
                return False
    return True

def get_archive_debian_dirs(packages):
    #try:
        #tempdir = tempfile.TemporaryDirectory()
    apt_pkg.init()
    for package in packages:
        try:
            url = get_url(package)
        except RuntimeError:
            continue
        #unpack_dir = os.path.join(tempdir.name, package.name)
        unpack_dir = os.path.join('../missing-tags/', package.name)
        if not os.path.isdir(unpack_dir):
            r = requests.get(url, stream=True)
            with io.BytesIO(r.content) as tarstream, \
                 tarfile.open(fileobj=tarstream) as tar:
                tar.extractall(unpack_dir)
        # Return the directory for cleanup at caller site.
    #return tempdir
    #except:
    #    tempdir.cleanup()
    #    raise

def get_url(package):
    source_recs = apt_pkg.SourceRecords()
    version = None
    while version != package.version:
        source_recs.lookup(package.name)
        version = source_recs.version
    for file in source_recs.files:
        if file[3] == 'diff':
            return DEBIAN_MIRROR + file[2]
    print("WARNING: {} seems to be native. IT WILL NOT BE"
          " CHECKED!".format(package.name), file=sys.stderr)
    raise RuntimeError(package.name + " has no diff file")

if __name__ == '__main__':
    main()

Attachment: pgpLlz2VvBdQg.pgp
Description: Digitale Signatur von OpenPGP


Reply to: