[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#773294: tracker.debian.org: add support for the vcswatch service



Dear Paul,

Here are three patches that will normally allow 

As for an example, you can have a look on my test tracker:

https://distro-tracker.pimeys.fr/pkg/kholidays
https://distro-tracker.pimeys.fr/pkg/puppet-module-puppetlabs-rsync

And one working well:
https://distro-tracker.pimeys.fr/pkg/python-aiosmtpd

Please, feel free to review and comment the patch.

It lacks tests for the task, I'll work on that by the end of the week in a
fourth patch.

-- 
PEB
From 2e3cd8d09faaba73b76b2e911a1d26b83bb6a52b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Elliott=20B=C3=A9cue?= <becue@crans.org>
Date: Tue, 21 Nov 2017 23:45:07 +0100
Subject: [PATCH 1/3] Adds compression utilities for future use with caches

---
 distro_tracker/core/tests/tests_utils.py | 32 ++++++++++++++++++++
 distro_tracker/core/utils/compression.py | 50 ++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 distro_tracker/core/utils/compression.py

diff --git a/distro_tracker/core/tests/tests_utils.py b/distro_tracker/core/tests/tests_utils.py
index fa78030..f3bfd66 100644
--- a/distro_tracker/core/tests/tests_utils.py
+++ b/distro_tracker/core/tests/tests_utils.py
@@ -38,6 +38,7 @@ from distro_tracker.core.utils import now
 from distro_tracker.core.utils import SpaceDelimitedTextField
 from distro_tracker.core.utils import PrettyPrintList
 from distro_tracker.core.utils import verify_signature
+from distro_tracker.core.utils.compression import uncompress_content
 from distro_tracker.core.utils.packages import AptCache
 from distro_tracker.core.utils.packages import extract_vcs_information
 from distro_tracker.core.utils.packages import extract_dsc_file_name
@@ -1539,3 +1540,34 @@ class UtilsTests(TestCase):
     def test_now(self):
         """Ensure distro_tracker.core.utils.now() exists"""
         self.assertIsInstance(now(), datetime.datetime)
+
+
+class CompressionTests(TestCase):
+    def setUp(self):
+        # Set up a cache directory to use in the tests
+        _handler, self.temporary_bzip2_file = tempfile.mkstemp(suffix='.bz2')
+        os.write(_handler, b'BZh91AY&SY\x03X\xf5w\x00\x00\x01\x15\x80`\x00\x00@\x06\x04\x90\x80 \x001\x06LA\x03L"\xe0\x8bb\xa3\x9e.\xe4\x8ap\xa1 \x06\xb1\xea\xee')
+        os.close(_handler)
+        _handler, self.temporary_gzip_file = tempfile.mkstemp(suffix='.gz')
+        os.write(_handler, b"\x1f\x8b\x08\x08\xca\xaa\x14Z\x00\x03helloworld\x00\xf3H\xcd\xc9\xc9W(\xcf/\xcaIQ\x04\x00\x95\x19\x85\x1b\x0c\x00\x00\x00")
+        os.close(_handler)
+        _handler, self.temporary_plain_file = tempfile.mkstemp()
+        os.write(_handler, b"Hello world!")
+        os.close(_handler)
+
+    def tearDown(self):
+        os.unlink(self.temporary_bzip2_file)
+        os.unlink(self.temporary_gzip_file)
+        os.unlink(self.temporary_plain_file)
+
+    def test_bzip2_file(self):
+        output = uncompress_content(self.temporary_bzip2_file)
+        self.assertEqual(output, "Hello world!")
+
+    def test_gzip_file(self):
+        output = uncompress_content(self.temporary_gzip_file)
+        self.assertEqual(output, "Hello world!")
+
+    def test_no_compression_file(self):
+        output = uncompress_content(self.temporary_plain_file)
+        self.assertEqual(output, "Hello world!")
diff --git a/distro_tracker/core/utils/compression.py b/distro_tracker/core/utils/compression.py
new file mode 100644
index 0000000..00f6b90
--- /dev/null
+++ b/distro_tracker/core/utils/compression.py
@@ -0,0 +1,50 @@
+# Copyright 2013 The Distro Tracker Developers
+# See the COPYRIGHT file at the top-level directory of this distribution and
+# at https://deb.li/DTAuthors
+#
+# This file is part of Distro Tracker. It is subject to the license terms
+# in the LICENSE file found in the top-level directory of this
+# distribution and at https://deb.li/DTLicense. No part of Distro Tracker,
+# including this file, may be copied, modified, propagated, or distributed
+# except according to the terms contained in the LICENSE file.
+"""
+Utilities for handling compression
+"""
+
+
+def guess_compression_method(filepath):
+    """Given filepath, inspects the file to determine a compression algorithm
+    if relevant."""
+    compressed_magic_bits_map = {
+        b"\x1f\x8b\x08": "gzip",
+        b"\x42\x5a\x68": "bz2",
+    }
+
+    max_magic_bits_len = max(len(key) for key in compressed_magic_bits_map)
+
+    with open(filepath, 'rb') as content_file:
+        begin = content_file.read(max_magic_bits_len)
+    for magic_bits, filetype in compressed_magic_bits_map.items():
+        if begin.startswith(magic_bits):
+            return filetype
+
+    return "plain"
+
+
+def uncompress_content(filepath):
+    """If the content is compressed, uncompress it."""
+
+    compression_method = guess_compression_method(filepath)
+
+    if compression_method == "gzip":
+        import gzip
+        with gzip.open(filepath, 'rb') as content_file:
+            return content_file.read()
+    if compression_method == "bz2":
+        import bz2
+        with bz2.BZ2File(filepath, 'rb') as content_file:
+            return content_file.read()
+
+    # No compression? Return as plain.
+    with open(filepath, 'rb') as content_file:
+        return content_file.read()
-- 
2.11.0

From 67a1aa921229f497d97d0c9790831904a195e42d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Elliott=20B=C3=A9cue?= <becue@crans.org>
Date: Tue, 21 Nov 2017 23:45:46 +0100
Subject: [PATCH 2/3] Implements compression support in HttpCache utilities

---
 distro_tracker/core/utils/http.py | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/distro_tracker/core/utils/http.py b/distro_tracker/core/utils/http.py
index ae46ea2..85c0f8c 100644
--- a/distro_tracker/core/utils/http.py
+++ b/distro_tracker/core/utils/http.py
@@ -22,6 +22,8 @@ import json
 from requests.structures import CaseInsensitiveDict
 import requests
 
+from .compression import uncompress_content
+
 
 def parse_cache_control_header(header):
     """
@@ -88,15 +90,21 @@ class HttpCache(object):
         # If there is no cache freshness date consider the item expired
         return True
 
-    def get_content(self, url):
-        """
-        Returns the content of the cached response for the given URL.
+    def get_content(self, url, compression=False):
+        """Returns the content of the cached response for the given URL.
+
+        If the file is compressed, then uncompress it, else, consider it
+        as plain file.
 
         :rtype: :class:`bytes`
+
         """
         if url in self:
-            with open(self._content_cache_file_path(url), 'rb') as content_file:
-                return content_file.read()
+            if compression:
+                return uncompress_content(self._content_cache_file_path(url))
+            else:
+                with open(self._content_cache_file_path(url), 'rb') as content_file:
+                    return content_file.read()
 
     def get_headers(self, url):
         """
@@ -170,9 +178,8 @@ class HttpCache(object):
         return md5(url.encode('utf-8')).hexdigest()
 
 
-def get_resource_content(url, cache=None):
-    """
-    A helper function which returns the content of the resource found at the
+def get_resource_content(url, cache=None, compression=False):
+    """A helper function which returns the content of the resource found at the
     given URL.
 
     If the resource is already cached in the ``cache`` object and the cached
@@ -188,9 +195,16 @@ def get_resource_content(url, cache=None):
         ``DISTRO_TRACKER_CACHE_DIRECTORY`` cache directory
         is used.
     :type cache: :class:`HttpCache` or an object with an equivalent interface
+    :param compression: The compression of the file accessed via
+        `url`. If False, then no compression. Otherwise, uses the
+        appropriate compression lib to read the file. Currently, only
+        gzip and bz2 are supported. Other compressions might come when
+        useful.
+    :type compression: bool
 
     :returns: The bytes representation of the resource found at the given url
     :rtype: bytes
+
     """
     if cache is None:
         cache_directory_path = settings.DISTRO_TRACKER_CACHE_DIRECTORY
@@ -199,6 +213,6 @@ def get_resource_content(url, cache=None):
     try:
         if cache.is_expired(url):
             cache.update(url)
-        return cache.get_content(url)
+        return cache.get_content(url, compression)
     except:
         pass
-- 
2.11.0

From 1357f6349024f064cdea5f19b81289076cfcab90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Elliott=20B=C3=A9cue?= <becue@crans.org>
Date: Tue, 21 Nov 2017 23:47:49 +0100
Subject: [PATCH 3/3] Implements VCSWatch in the tracker.

 * Closes bug #773294
---
 distro_tracker/core/panels.py                      |   8 +
 .../core/templates/core/panels/general.html        |   2 +-
 .../templates/debian/vcswatch-action-item.html     |   9 +
 distro_tracker/vendor/debian/tracker_tasks.py      | 309 +++++++++++++++++++++
 4 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 distro_tracker/vendor/debian/templates/debian/vcswatch-action-item.html

diff --git a/distro_tracker/core/panels.py b/distro_tracker/core/panels.py
index 1cdd4ff..4e13156 100644
--- a/distro_tracker/core/panels.py
+++ b/distro_tracker/core/panels.py
@@ -235,6 +235,12 @@ class GeneralInformationPanel(BasePanel):
             # There is no general info for the package
             return
 
+        try:
+            vcswatch = PackageExtractedInfo.objects.get(
+                package=self.package, key='vcswatch').value
+        except PackageExtractedInfo.DoesNotExist:
+            vcswatch = {}
+
         general = info.value
         # Add source package URL
         url, implemented = vendor.call('get_package_information_site_url', **{
@@ -247,6 +253,8 @@ class GeneralInformationPanel(BasePanel):
         if 'vcs' in general and 'type' in general['vcs']:
             shorthand = general['vcs']['type']
             general['vcs']['full_name'] = get_vcs_name(shorthand)
+            if vcswatch.get('url', None) is not None:
+                general['vcs']['watch'] = vcswatch['url']
         # Add mailing list archive URLs
         self._add_archive_urls(general)
         # Add developer information links and any other vendor-specific extras
diff --git a/distro_tracker/core/templates/core/panels/general.html b/distro_tracker/core/templates/core/panels/general.html
index e55c564..5049c56 100644
--- a/distro_tracker/core/templates/core/panels/general.html
+++ b/distro_tracker/core/templates/core/panels/general.html
@@ -95,7 +95,7 @@
 	<a href="{{ ctx.vcs.url }}">{{ vcs }}</a>
 	{% endif %}
 	{% if ctx.vcs.browser %}
-	(<a href="{{ ctx.vcs.browser }}">Browse</a>)
+	(<a href="{{ ctx.vcs.browser }}">Browse</a>{% if ctx.vcs.watch %}, <a href="{{ ctx.vcs.watch }}">QA</a>{% endif %})
 	{% endif %}
 	{% endwith %}
     </li>
diff --git a/distro_tracker/vendor/debian/templates/debian/vcswatch-action-item.html b/distro_tracker/vendor/debian/templates/debian/vcswatch-action-item.html
new file mode 100644
index 0000000..553f054
--- /dev/null
+++ b/distro_tracker/vendor/debian/templates/debian/vcswatch-action-item.html
@@ -0,0 +1,9 @@
+{% with description=item.extra_data.description %}
+{% with error=item.extra_data.error %}
+
+<a href="{{item.extra_data.vcswatch_url}}">VCSwatch</a> reports
+that {{description}}<br/><br/>
+{% if error %}
+<span>{{error}}</span>
+{% endif %}
+{% endwith %}{% endwith %}
diff --git a/distro_tracker/vendor/debian/tracker_tasks.py b/distro_tracker/vendor/debian/tracker_tasks.py
index 82c2572..f7ba0ae 100644
--- a/distro_tracker/vendor/debian/tracker_tasks.py
+++ b/distro_tracker/vendor/debian/tracker_tasks.py
@@ -2439,3 +2439,312 @@ class MultiArchHintsTask(BaseTask):
 
             ActionItem.objects.delete_obsolete_items([self.action_item_type],
                                                      packages.keys())
+
+
+class UpdateVcsWatchTask(BaseTask):
+    """
+    Updates packages' lintian stats.
+    """
+    ACTION_ITEM_TYPE_NAME = 'vcswatch-warnings-and-errors'
+    ITEM_DESCRIPTION = 'This package <a href="{url}">{report}</a>'
+    ITEM_FULL_DESCRIPTION_TEMPLATE = 'debian/vcswatch-action-item.html'
+    VCSWATCH_URL = 'https://qa.debian.org/cgi-bin/vcswatch?package=%(package)s'
+
+    VCSWATCH_STATUS_DESCS = {
+        u"NEW": {
+            "brief": "has a new version in the VCS.",
+            "long": (
+                "this package has a new version ready in the VCS. "
+                "You should consider uploading into the archive."
+            ),
+            "severity": ActionItem.SEVERITY_NORMAL,
+        },
+        u"COMMITS": {
+            "brief": "has {commits} new commits in its VCS.",
+            "long": (
+                "this package seems to have new commits in its "
+                "VCS. You should consider updating the debian/changelog "
+                "and to upload this new version into the archive."
+            ),
+            "severity": ActionItem.SEVERITY_NORMAL,
+        },
+        u"OLD": {
+            "brief": "VCS is NOT up to date!",
+            "long": (
+                "the current version of the package is NOT in its "
+                "VCS. You should upload your changes immediately."
+            ),
+            "severity": ActionItem.SEVERITY_HIGH,
+        },
+        u"UNREL": {
+            "brief": "VCS has unreleased changelog!",
+            "long": (
+                "this package has been uploaded into the archive but "
+                "the debian/changelog into the VCS is still UNRELEASED. "
+                "You should consider updating the VCS."
+            ),
+            "severity": ActionItem.SEVERITY_HIGH,
+        },
+        u"ERROR": {
+            "brief": "VCS has an error!",
+            "long": (
+                "there is an error with this package's VCS, or the "
+                "debian/changelog file inside it. Either you should "
+                "create the VCS or you should fix whatever issue there is."
+            ),
+            "severity": ActionItem.SEVERITY_HIGH,
+        },
+        u"DEFAULT": {
+            "brief": "\"Huh, this is weird.\"",
+            "long": (
+                "you shouldn't see this report. Please report this bug "
+                "to the tracker's maintainers with the current URL of "
+                "the page you're seeing."
+            ),
+            "severity": ActionItem.SEVERITY_HIGH,
+        },
+    }
+
+    def __init__(self, force_update=False, *args, **kwargs):
+        super(UpdateVcsWatchTask, self).__init__(*args, **kwargs)
+        self.force_update = force_update
+        self.vcswatch_action_item_type = ActionItemType.objects.create_or_update(
+            type_name=self.ACTION_ITEM_TYPE_NAME,
+            full_description_template=self.ITEM_FULL_DESCRIPTION_TEMPLATE)
+
+    def set_parameters(self, parameters):
+        if 'force_update' in parameters:
+            self.force_update = parameters['force_update']
+
+    @staticmethod
+    def get_data_checksum(data):
+        json_dump = json.dumps(data, sort_keys=True)
+        if json_dump is not six.binary_type:
+            json_dump = json_dump.encode('UTF-8')
+        return hashlib.md5(json_dump).hexdigest()
+
+    def get_vcswatch_data(self):
+        url = 'https://qa.debian.org/data/vcswatch/vcswatch.json.gz'
+        data = json.loads(get_resource_content(url, compression=True).decode('utf-8'))
+
+        __out = {}
+        # This allows to save a lot of list search later.
+        for entry in data:
+            __out[entry[u'package']] = entry
+
+        return __out
+
+    def update_packages_item(self, packages, vcswatch_datas):
+        """Generates the lists of :class:`ActionItem` to be added,
+        deleted or updated regarding the status of their packages.
+
+        Categories of statuses are:
+        {u'COMMITS', u'ERROR', u'NEW', u'OK', u'OLD', u'UNREL'}
+
+        """
+
+        __todo = {
+            'drop': {
+                'action_items': [],
+                'package_infos': []
+            },
+            'update': {
+                'action_items': [],
+                'package_infos': []
+            },
+            'add': {
+                'action_items': [],
+                'package_infos': []
+            },
+        }
+
+        # Fetches all PackageExtractedInfo for packages having a vcswatch
+        # key. As the pair (package, key) is unique, there is a bijection
+        # between these data, and we fetch them classifying them by package
+        # name.
+        package_infos = {
+            package_info.package.name: package_info
+            for package_info in PackageExtractedInfo.objects.select_related(
+                'package'
+            ).filter(key='vcswatch').only('package__name', 'value')
+        }
+
+        # Fetches all ActionItems for packages concerned by a vcswatch action.
+        action_items = {
+            action_item.package.name: action_item
+            for action_item in ActionItem.objects.select_related(
+                'package'
+            ).filter(item_type=self.vcswatch_action_item_type)
+        }
+
+        for package in packages:
+            # Get the vcswatch_data from the whole vcswatch_datas
+            vcswatch_data = vcswatch_datas[package.name]
+            package_status = vcswatch_data[u'status']
+
+            # Get the old action item for this warning, if it exists.
+            action_item = action_items.get(package.name, None)
+            package_info = package_infos.get(package.name, None)
+
+            if package_status == u"OK":
+                # Everything is fine, let's purge the action item and the
+                # package extracted info!
+                if action_item:
+                    __todo['drop']['action_items'].append(action_item)
+
+                if package_info:
+                    __todo['drop']['package_infos'].append(package_info)
+
+                # Nothing more to do!
+                continue
+
+            # If we are here, then something is not OK. Let's check if we
+            # already had some intel regarding the current package status.
+            if action_item is None:
+                action_item = ActionItem(
+                    package=package,
+                    item_type=self.vcswatch_action_item_type)
+                __todo['add']['action_items'].append(action_item)
+            else:
+                __todo['update']['action_items'].append(action_item)
+
+            # Same thing with PackageExtractedInfo
+            if package_info is None:
+                package_info = PackageExtractedInfo(
+                    package=package,
+                    key='vcswatch',
+                )
+                __todo['add']['package_infos'].append(package_info)
+            else:
+                __todo['update']['package_infos'].append(package_info)
+
+
+            # Computes the watch URL
+            vcswatch_url = self.VCSWATCH_URL % {'package': package.name}
+
+            if action_item.extra_data:
+                extra_data = action_item.extra_data
+            else:
+                extra_data = {}
+
+            # Fetches the long description and severity from
+            # the VCSWATCH_STATUS_DESCS dict.
+            description = self.VCSWATCH_STATUS_DESCS.get(
+                package_status,
+                self.VCSWATCH_STATUS_DESCS[u"DEFAULT"],
+            )['long']
+            action_item.severity = self.VCSWATCH_STATUS_DESCS.get(
+                package_status,
+                self.VCSWATCH_STATUS_DESCS[u"DEFAULT"],
+            )['severity']
+
+            # The new data
+            new_extra_data = {
+                'status': package_status,
+                'description': description,
+                'error': vcswatch_data[u"error"],
+                'vcswatch_url': vcswatch_url,
+                'commits': vcswatch_data[u"commits"],
+            }
+
+            # News we have to determine if anything requires an update.
+            # If not, let's avoir abusing the database resources.
+            new_extra_data_checksum = self.get_data_checksum(new_extra_data)
+
+            extra_data_match = all([
+                new_extra_data[key] == extra_data.get(key, None)
+                for key in new_extra_data
+            ])
+            package_info_match = (
+                package_info.value.get('checksum', None) == new_extra_data_checksum and
+                package_info.value.get('url', None) == vcswatch_url
+            )
+
+            # If everything is fine and we are not forcing the update
+            # then we proceed to the next package.
+            if extra_data_match and package_info_match and not self.force_update:
+                # Remove from the todolist
+                __todo['update']['action_items'].remove(action_item)
+                __todo['update']['package_infos'].remove(package_info)
+                continue
+
+            # If we're here, there is something to create or to
+            # update.
+            action_item.extra_data = new_extra_data
+            package_info.value = {
+                'checksum': new_extra_data_checksum,
+                'url': vcswatch_url,
+            }
+
+            # Report for short description of the :class:`ActionItem`
+            report = self.VCSWATCH_STATUS_DESCS.get(
+                package_status,
+                self.VCSWATCH_STATUS_DESCS[u"DEFAULT"],
+            )['brief']
+
+            # If COMMITS, then string format the report.
+            if package_status == u'COMMITS':
+                report = report.format(commits=vcswatch_data[u"commits"])
+
+            action_item.short_description = self.ITEM_DESCRIPTION.format(
+                url=vcswatch_url,
+                report=report,
+            )
+
+        return __todo
+
+    def execute(self):
+        # Get the actual vcswatch json file from qa.debian.org
+        vcs_data = self.get_vcswatch_data()
+
+        # Nothing? Return.
+        if not vcs_data:
+            return
+
+        # Only fetch the packages that are in the json dict.
+        packages = PackageName.objects.filter(name__in=vcs_data.keys())
+
+        # Faster than fetching the action items one by one in a loop
+        # when handling each package.
+        packages.prefetch_related('action_items')
+
+        # Determine wether something is to be kept or dropped.
+        todo = self.update_packages_item(packages, vcs_data)
+
+        with transaction.atomic():
+            # Delete the :class:`ActionItem` and the
+            # :class:`PackageExtractedInfo` that are osbolete.
+            ActionItem.objects.delete_obsolete_items(
+                [self.vcswatch_action_item_type],
+                vcs_data.keys())
+            PackageExtractedInfo.objects.filter(
+                key='vcswatch').exclude(
+                package__name__in=vcs_data.keys()).delete()
+
+            # Then delete the :class:`ActionItem` and the
+            # :class:`PackageExtractedInfo` that are to be deleted.
+            ActionItem.objects.filter(
+                item_type__type_name=self.vcswatch_action_item_type.type_name,
+                id__in=[
+                    action_item.id
+                    for action_item in todo['drop']['action_items']
+                ]
+            ).delete()
+            PackageExtractedInfo.objects.filter(
+                key='vcswatch',
+                id__in=[
+                    package_info.id
+                    for package_info in todo['drop']['package_infos']
+                ]
+            ).delete()
+
+            # Then bulk_create the :class:`ActionItem` to add and the
+            # :class:`PackageExtractedInfo`
+            ActionItem.objects.bulk_create(todo['add']['action_items'])
+            PackageExtractedInfo.objects.bulk_create(todo['add']['package_infos'])
+
+            # Update existing entries
+            for action_item in todo['update']['action_items']:
+                action_item.save()
+            for package_info in todo['update']['package_infos']:
+                package_info.save()
-- 
2.11.0

Attachment: signature.asc
Description: PGP signature


Reply to: