Re: Bug#853189: tracker.debian.org: Ecnoding issue / Code injection through Maintainer field (and probably others)
- To: 853189@bugs.debian.org, debian-release@lists.debian.org
- Subject: Re: Bug#853189: tracker.debian.org: Ecnoding issue / Code injection through Maintainer field (and probably others)
- From: Christophe Siraut <d@tobald.eu.org>
- Date: Wed, 1 Feb 2017 17:13:53 +0100
- Message-id: <[🔎] 20170201161353.7gmfcvxk5fxhkzz6@localhost>
- In-reply-to: <52dfed03-cc42-d6d7-d7fd-e115e88809fc@thykier.net>
- References: <148578742401.8158.7519569264622149962.reportbug@portux.lan.naturalnet.de> <20170130154854.ebjhrqpztsnsrwt4@mapreri.org> <52dfed03-cc42-d6d7-d7fd-e115e88809fc@thykier.net>
Niels Thykier wrote:
> * tracker.d.o does *not* import excuses.yaml but update_excuses.html
> (as far as I am informed at least)
True.
Here is a patch for tracker to parse YAML instead of HTML.
Cheers,
Christophe
>From 04692b5c65124b930a94f668cd2b409269d186c5 Mon Sep 17 00:00:00 2001
From: Christophe Siraut <tobald@debian.org>
Date: Wed, 1 Feb 2017 17:05:05 +0100
Subject: [PATCH] Use excuses.yaml instead of parsing HTML. Closes: #853189
---
.../vendor/debian/tests-data/update_excuses-1.html | 11 ---
.../vendor/debian/tests-data/update_excuses-1.yaml | 11 +++
.../vendor/debian/tests-data/update_excuses-2.html | 11 ---
.../vendor/debian/tests-data/update_excuses-2.yaml | 12 +++
distro_tracker/vendor/debian/tests.py | 11 +--
distro_tracker/vendor/debian/tracker_tasks.py | 106 ++++++---------------
6 files changed, 56 insertions(+), 106 deletions(-)
delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.html
create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml
delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.html
create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml
diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html b/distro_tracker/vendor/debian/tests-data/update_excuses-1.html
deleted file mode 100644
index c23541e..0000000
--- a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
-<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>
-<p>Generated: 2013.08.12 10:03:22 +0000</p>
-<ul>
-<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0)
-<ul>
-<li>Maintainer: Some Maintainer
-<li>20 days old (needed 10 days)
-<li>Not considered
-</ul>
-</ul></body></html>
diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml
new file mode 100644
index 0000000..bb0d86e
--- /dev/null
+++ b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml
@@ -0,0 +1,11 @@
+generated-date: 2017-02-01 06:47:18.195464
+sources:
+- excuses:
+ - 20 days old (needed 10 days)
+ hints:
+ is-candidate:
+ item-name: dummy-package
+ new-version: 2.0.0
+ old-version: 1.0.0
+ reason: []
+ source: dummy-package
diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html b/distro_tracker/vendor/debian/tests-data/update_excuses-2.html
deleted file mode 100644
index 4666c7b..0000000
--- a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
-<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>
-<p>Generated: 2013.08.12 10:03:22 +0000</p>
-<ul>
-<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0)
-<ul>
-<li>Maintainer: Some Maintainer
-<li>10 days old (needed 10 days)
-<li>Not considered
-</ul>
-</ul></body></html>
diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml
new file mode 100644
index 0000000..f3e74be
--- /dev/null
+++ b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml
@@ -0,0 +1,12 @@
+generated-date: 2017-02-01 06:47:18.195464
+sources:
+- excuses:
+ - 10 days old (needed 10 days)
+ hints:
+ is-candidate:
+ item-name: dummy-package
+ new-version: 2.0.0
+ old-version: 1.0.0
+ reason: []
+ source: dummy-package
+
diff --git a/distro_tracker/vendor/debian/tests.py b/distro_tracker/vendor/debian/tests.py
index b67271e..5a56566 100644
--- a/distro_tracker/vendor/debian/tests.py
+++ b/distro_tracker/vendor/debian/tests.py
@@ -1760,15 +1760,14 @@ class UpdateExcusesTaskActionItemTest(TestCase):
def set_update_excuses_content(self, content):
"""
- Sets the stub content of the update_excuses.html that the task will
+ Sets the stub content of the update_excuses.yaml that the task will
have access to.
"""
- self.task._get_update_excuses_content.return_value = iter(
- content.splitlines())
+ self.task._get_update_excuses_content.return_value = content
def set_update_excuses_content_from_file(self, file_name):
"""
- Sets the stub content of the update_excuses.html that the task will
+ Sets the stub content of the update_excuses.yaml that the task will
have access to based on the content of the test file with the given
name.
"""
@@ -1786,7 +1785,7 @@ class UpdateExcusesTaskActionItemTest(TestCase):
Tests that an action item is created when a package has not moved to
testing after the allocated period.
"""
- self.set_update_excuses_content_from_file('update_excuses-1.html')
+ self.set_update_excuses_content_from_file('update_excuses-1.yaml')
# Sanity check: no action items currently
self.assertEqual(0, ActionItem.objects.count())
expected_data = {
@@ -1834,7 +1833,7 @@ class UpdateExcusesTaskActionItemTest(TestCase):
package=self.package_name,
item_type=self.get_action_item_type(),
short_description="Desc")
- self.set_update_excuses_content_from_file('update_excuses-2.html')
+ self.set_update_excuses_content_from_file('update_excuses-2.yaml')
self.run_task()
diff --git a/distro_tracker/vendor/debian/tracker_tasks.py b/distro_tracker/vendor/debian/tracker_tasks.py
index 0a7b185..efbe152 100644
--- a/distro_tracker/vendor/debian/tracker_tasks.py
+++ b/distro_tracker/vendor/debian/tracker_tasks.py
@@ -830,86 +830,36 @@ class UpdateExcusesTask(BaseTask):
return True
return False
- def _extract_problems_in_excuses_item(self, subline, package, problematic):
- if 'days old (needed' in subline:
- words = subline.split()
- age, limit = words[0], words[4]
- if age != limit:
- # It is problematic only when the age is strictly
- # greater than the limit.
- problematic[package] = {
- 'age': age,
- 'limit': limit,
- }
+ def _extract_problematic(self, item):
+ if not 'policy_info' in item or not 'age' in item['policy_info']:
+ return
+ package = item['item-name']
+ age = item['policy_info']['age']['current-age']
+ limit = item['policy_info']['age']['age-requirement']
+ if age > limit:
+ return (package, {'age': age, 'limit': limit})
- def _get_excuses_and_problems(self, content_lines):
+ def _get_excuses_and_problems(self, content):
"""
- Gets the excuses for each package from the given iterator of lines
- representing the excuses html file.
- Also finds a list of packages which have not migrated to testing even
- after the necessary time has passed.
+ Gets the excuses for each package.
+ Also finds a list of packages which have not migrated to testing
+ agter the necessary time has passed.
- :returns: A two-tuple where the first element is a dict mapping
- package names to a list of excuses. The second element is a dict
- mapping package names to a problem information. Problem information
- is a dict with the keys ``age`` and ``limit``.
+ :returns: A two-tuple where the first element is a dict mapping
+ package names to a list of excuses. The second element is a dict
+ mapping packages names to a problem information. Problem information
+ is a dict with the keys ``age`` and ``limit``.
"""
- try:
- # Skip all HTML before the first list
- while '<ul>' not in next(content_lines):
- pass
- except StopIteration:
+ if not 'sources' in content:
logger.warning("Invalid format of excuses file")
return
- top_level_list = True
- package = ""
- package_excuses = {}
- problematic = {}
- excuses = []
- for line in content_lines:
- if isinstance(line, six.binary_type):
- line = line.decode('utf-8')
- if '</ul>' in line:
- # The inner list is closed -- all excuses for the package are
- # processed and we're back to the top-level list.
- top_level_list = True
- if '/' in package:
- continue
- # Done with the package
- package_excuses[package] = deepcopy(excuses)
- continue
-
- if '<ul>' in line:
- # Entering the list of excuses
- top_level_list = False
- continue
-
- if top_level_list:
- # The entry in the top level list outside of an inner list is
- # a <li> item giving the name of the package for which the
- # excuses follow.
- words = re.split("[><() ]", line)
- package = words[6]
- excuses = []
- top_level_list = False
- continue
-
- line = line.strip()
- for subline in line.split("<li>"):
- if self._skip_excuses_item(subline):
- continue
-
- # Check if there is a problem for the package.
- self._extract_problems_in_excuses_item(subline, package,
- problematic)
-
- # Extract the rest of the excuses
- # If it contains a link to an anchor convert it to a link to a
- # package page.
- excuses.append(self._adapt_excuse_links(subline))
-
- return package_excuses, problematic
+ items = content['sources']
+ linked = lambda e: map(self._adapt_excuse_links, e)
+ excuses = [(e['item-name'], linked(e['excuses'])) for e in items]
+ problems = map(self._extract_problematic, items)
+ problematic = [p for p in problems if p]
+ return dict(excuses), dict(problematic)
def _create_action_item(self, package, extra_data):
"""
@@ -944,16 +894,16 @@ class UpdateExcusesTask(BaseTask):
def _get_update_excuses_content(self):
"""
- Function returning the content of the update_excuses.html file as an
- terable of lines.
- Returns ``None`` if the content in the cache is up to date.
+ Function returning the content of excuses from debian-release
+ :returns: a dict of excuses or ``None`` if the content in the
+ cache is up to date.
"""
- url = 'https://release.debian.org/britney/update_excuses.html'
+ url = 'https://release.debian.org/britney/excuses.yaml'
response, updated = self.cache.update(url, force=self.force_update)
if not updated:
return
- return response.iter_lines(decode_unicode=True)
+ return yaml.load(response.text)
def execute(self):
content_lines = self._get_update_excuses_content()
--
2.11.0
Reply to: