[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#853189: tracker.debian.org: Ecnoding issue / Code injection through Maintainer field (and probably others)



Niels Thykier wrote:
>  * tracker.d.o does *not* import excuses.yaml but update_excuses.html
>    (as far as I am informed at least)

True.

Here is a patch for tracker to parse YAML instead of HTML.

Cheers,
Christophe
>From 04692b5c65124b930a94f668cd2b409269d186c5 Mon Sep 17 00:00:00 2001
From: Christophe Siraut <tobald@debian.org>
Date: Wed, 1 Feb 2017 17:05:05 +0100
Subject: [PATCH] Use excuses.yaml instead of parsing HTML. Closes: #853189

---
 .../vendor/debian/tests-data/update_excuses-1.html |  11 ---
 .../vendor/debian/tests-data/update_excuses-1.yaml |  11 +++
 .../vendor/debian/tests-data/update_excuses-2.html |  11 ---
 .../vendor/debian/tests-data/update_excuses-2.yaml |  12 +++
 distro_tracker/vendor/debian/tests.py              |  11 +--
 distro_tracker/vendor/debian/tracker_tasks.py      | 106 ++++++---------------
 6 files changed, 56 insertions(+), 106 deletions(-)
 delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.html
 create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml
 delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.html
 create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml

diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html b/distro_tracker/vendor/debian/tests-data/update_excuses-1.html
deleted file mode 100644
index c23541e..0000000
--- a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd";>
-<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>
-<p>Generated: 2013.08.12 10:03:22 +0000</p>
-<ul>
-<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0)
-<ul>
-<li>Maintainer: Some Maintainer
-<li>20 days old (needed 10 days)
-<li>Not considered
-</ul>
-</ul></body></html>
diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml
new file mode 100644
index 0000000..bb0d86e
--- /dev/null
+++ b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml
@@ -0,0 +1,11 @@
+generated-date: 2017-02-01 06:47:18.195464
+sources:
+- excuses:
+  - 20 days old (needed 10 days)
+  hints:
+  is-candidate:
+  item-name: dummy-package
+  new-version: 2.0.0
+  old-version: 1.0.0
+  reason: []
+  source: dummy-package
diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html b/distro_tracker/vendor/debian/tests-data/update_excuses-2.html
deleted file mode 100644
index 4666c7b..0000000
--- a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd";>
-<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>
-<p>Generated: 2013.08.12 10:03:22 +0000</p>
-<ul>
-<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0)
-<ul>
-<li>Maintainer: Some Maintainer
-<li>10 days old (needed 10 days)
-<li>Not considered
-</ul>
-</ul></body></html>
diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml
new file mode 100644
index 0000000..f3e74be
--- /dev/null
+++ b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml
@@ -0,0 +1,12 @@
+generated-date: 2017-02-01 06:47:18.195464
+sources:
+- excuses:
+  - 10 days old (needed 10 days)
+  hints:
+  is-candidate:
+  item-name: dummy-package
+  new-version: 2.0.0
+  old-version: 1.0.0
+  reason: []
+  source: dummy-package
+
diff --git a/distro_tracker/vendor/debian/tests.py b/distro_tracker/vendor/debian/tests.py
index b67271e..5a56566 100644
--- a/distro_tracker/vendor/debian/tests.py
+++ b/distro_tracker/vendor/debian/tests.py
@@ -1760,15 +1760,14 @@ class UpdateExcusesTaskActionItemTest(TestCase):
 
     def set_update_excuses_content(self, content):
         """
-        Sets the stub content of the update_excuses.html that the task will
+        Sets the stub content of the update_excuses.yaml that the task will
         have access to.
         """
-        self.task._get_update_excuses_content.return_value = iter(
-            content.splitlines())
+        self.task._get_update_excuses_content.return_value = content
 
     def set_update_excuses_content_from_file(self, file_name):
         """
-        Sets the stub content of the update_excuses.html that the task will
+        Sets the stub content of the update_excuses.yaml that the task will
         have access to based on the content of the test file with the given
         name.
         """
@@ -1786,7 +1785,7 @@ class UpdateExcusesTaskActionItemTest(TestCase):
         Tests that an action item is created when a package has not moved to
         testing after the allocated period.
         """
-        self.set_update_excuses_content_from_file('update_excuses-1.html')
+        self.set_update_excuses_content_from_file('update_excuses-1.yaml')
         # Sanity check: no action items currently
         self.assertEqual(0, ActionItem.objects.count())
         expected_data = {
@@ -1834,7 +1833,7 @@ class UpdateExcusesTaskActionItemTest(TestCase):
             package=self.package_name,
             item_type=self.get_action_item_type(),
             short_description="Desc")
-        self.set_update_excuses_content_from_file('update_excuses-2.html')
+        self.set_update_excuses_content_from_file('update_excuses-2.yaml')
 
         self.run_task()
 
diff --git a/distro_tracker/vendor/debian/tracker_tasks.py b/distro_tracker/vendor/debian/tracker_tasks.py
index 0a7b185..efbe152 100644
--- a/distro_tracker/vendor/debian/tracker_tasks.py
+++ b/distro_tracker/vendor/debian/tracker_tasks.py
@@ -830,86 +830,36 @@ class UpdateExcusesTask(BaseTask):
             return True
         return False
 
-    def _extract_problems_in_excuses_item(self, subline, package, problematic):
-        if 'days old (needed' in subline:
-            words = subline.split()
-            age, limit = words[0], words[4]
-            if age != limit:
-                # It is problematic only when the age is strictly
-                # greater than the limit.
-                problematic[package] = {
-                    'age': age,
-                    'limit': limit,
-                }
+    def _extract_problematic(self, item):
+        if not 'policy_info' in item or not 'age' in item['policy_info']:
+            return
+        package = item['item-name']
+        age = item['policy_info']['age']['current-age']
+        limit = item['policy_info']['age']['age-requirement']
+        if age > limit:
+            return (package, {'age': age, 'limit': limit})
 
-    def _get_excuses_and_problems(self, content_lines):
+    def _get_excuses_and_problems(self, content):
         """
-        Gets the excuses for each package from the given iterator of lines
-        representing the excuses html file.
-        Also finds a list of packages which have not migrated to testing even
-        after the necessary time has passed.
+        Gets the excuses for each package.
+        Also finds a list of packages which have not migrated to testing
+        agter the necessary time has passed.
 
-        :returns: A two-tuple where the first element is a dict mapping
-            package names to a list of excuses. The second element is a dict
-            mapping package names to a problem information. Problem information
-            is a dict with the keys ``age`` and ``limit``.
+        :returns: A two-tuple  where the first element is a dict mapping
+        package names to a list of excuses. The second element is a dict
+        mapping packages names to a problem information. Problem information
+        is a dict with the keys ``age`` and ``limit``.
         """
-        try:
-            # Skip all HTML before the first list
-            while '<ul>' not in next(content_lines):
-                pass
-        except StopIteration:
+        if not 'sources' in content:
             logger.warning("Invalid format of excuses file")
             return
 
-        top_level_list = True
-        package = ""
-        package_excuses = {}
-        problematic = {}
-        excuses = []
-        for line in content_lines:
-            if isinstance(line, six.binary_type):
-                line = line.decode('utf-8')
-            if '</ul>' in line:
-                # The inner list is closed -- all excuses for the package are
-                # processed and we're back to the top-level list.
-                top_level_list = True
-                if '/' in package:
-                    continue
-                # Done with the package
-                package_excuses[package] = deepcopy(excuses)
-                continue
-
-            if '<ul>' in line:
-                # Entering the list of excuses
-                top_level_list = False
-                continue
-
-            if top_level_list:
-                # The entry in the top level list outside of an inner list is
-                # a <li> item giving the name of the package for which the
-                # excuses follow.
-                words = re.split("[><() ]", line)
-                package = words[6]
-                excuses = []
-                top_level_list = False
-                continue
-
-            line = line.strip()
-            for subline in line.split("<li>"):
-                if self._skip_excuses_item(subline):
-                    continue
-
-                # Check if there is a problem for the package.
-                self._extract_problems_in_excuses_item(subline, package,
-                                                       problematic)
-
-                # Extract the rest of the excuses
-                # If it contains a link to an anchor convert it to a link to a
-                # package page.
-                excuses.append(self._adapt_excuse_links(subline))
-
-        return package_excuses, problematic
+        items = content['sources']
+        linked = lambda e: map(self._adapt_excuse_links, e)
+        excuses = [(e['item-name'], linked(e['excuses'])) for e in items]
+        problems = map(self._extract_problematic, items)
+        problematic = [p for p in problems if p]
+        return dict(excuses), dict(problematic)
 
     def _create_action_item(self, package, extra_data):
         """
@@ -944,16 +894,16 @@ class UpdateExcusesTask(BaseTask):
 
     def _get_update_excuses_content(self):
         """
-        Function returning the content of the update_excuses.html file as an
-        terable of lines.
-        Returns ``None`` if the content in the cache is up to date.
+        Function returning the content of excuses from debian-release
+        :returns: a dict of excuses or ``None`` if the content in the
+        cache is up to date.
         """
-        url = 'https://release.debian.org/britney/update_excuses.html'
+        url = 'https://release.debian.org/britney/excuses.yaml'
         response, updated = self.cache.update(url, force=self.force_update)
         if not updated:
             return
 
-        return response.iter_lines(decode_unicode=True)
+        return yaml.load(response.text)
 
     def execute(self):
         content_lines = self._get_update_excuses_content()
-- 
2.11.0


Reply to: