[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#924139: www.debian.org: migrate from python to python3



Hi Cyril,

On Sat, Mar 09, 2019 at 09:48:53PM +0100, Cyril Brulebois wrote:
... 
> There are a couple of scripts that rely on Python at the moment:
... 
> It might be possible to get rid of the first one after the CVS→Git
> migration, but others might need to get ported to Python 3 at some
> point, as Python 2 is on the way out.
> 
> The debian.org metapackage for the website will need to get updated
> accordingly during/after the migration.

I tried to make the migration over to Python3 for the files that have
been identifed by 2to3 as required to get reworked.

As far as possible I've tested the scripts. But someone with more
knowledge will have to take a look athe the possible corner cases.

The follwoing files I could test succesfully.
 english/security/oval/generate.py
 english/security/oval/oval/definition/generator.py
 english/security/oval/oval/parser/dsa.py

These two scripts I'm unable to test.
 english/mirror/timestamps/archive_mirror_check.py
 english/mirror/timestamps/mirror_check.py

I added two patches with the resulting changes.

Regards
Carsten
>From eefa86aa6edf33982e8c5a599cec07480050bbc8 Mon Sep 17 00:00:00 2001
From: Carsten Schoenert <c.schoenert@t-online.de>
Date: Sat, 16 Mar 2019 15:03:29 +0100
Subject: [PATCH 1/2] Oval: modify scripts to use Python3

Move over to Python3 syntax so the script generate.py can be used with
the Python3 interpreter.
---
 english/security/oval/generate.py             |  7 ++-
 .../oval/oval/definition/generator.py         | 59 +++++++++++--------
 english/security/oval/oval/parser/dsa.py      |  3 +-
 3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/english/security/oval/generate.py b/english/security/oval/generate.py
index 830ddb48007..f966f2213b4 100644
--- a/english/security/oval/generate.py
+++ b/english/security/oval/generate.py
@@ -1,8 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 # Extracts the data from the security tracker and creates OVAL queries to
 # be used with the OVAL query interpreter (see http://oval.mitre.org)
 
+# (c) 2019 Carsten Schoenert
 # (c) 2016 Sebastien Delafond <sdelafond@gmail.com>
 # (c) 2015 Nicholas Luedtke
 # Licensed under the GNU General Public License version 2.                                                                                     
@@ -41,8 +42,8 @@ DEBIAN_VERSION = {
 def usage (prog = "parse-wml-oval.py"):
     """Print information about script flags and options"""
 
-    print """usage: %s [vh] [-d <directory>]\t-d\twhich directory use for
-    dsa definition search\t-v\tverbose mode\t-h\tthis help""" % prog
+    print("""usage: {} [vh] [-d <directory>]\t-d\twhich directory use for"""
+          """dsa definition search\t-v\tverbose mode\t-h\tthis help""".format(prog))
 def printdsas(ovals):
     """ Generate and print OVAL Definitions for collected DSA information """
 
diff --git a/english/security/oval/oval/definition/generator.py b/english/security/oval/oval/definition/generator.py
index 087d9000472..4996559cc4f 100644
--- a/english/security/oval/oval/definition/generator.py
+++ b/english/security/oval/oval/definition/generator.py
@@ -3,6 +3,7 @@
 # OVAL definitions of Debian Security Advisories.
 # Use various optimizations to minimize result XML
 #
+# (c) 2019 Carsten Schoenert
 # (c) 2016 Sebastien Delafond <sdelafond@gmail.com>
 # (c) 2015 Nicholas Luedtke
 # (c) 2007 Pavel Vinogradov            
@@ -16,8 +17,22 @@ from lxml import etree
 from oval.definition.differ import differ
 import re
 
-# from http://boodebr.org/main/python/all-about-python-and-unicode#UNI_XML
-RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + u'|' + u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % (unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff), unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff), unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff)) 
+# We add some Unicode characters for High Private Use Surrogates aka "�" we
+# need to ignore.
+# Have a look at https://unicodemap.org/range/76/High_Private_Use_Surrogates/
+# Based on http://boodebr.org/main/python/all-about-python-and-unicode#UNI_XML
+RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + u'|' + u'([{0}-{1}][^{2}-{3}])|([^{4}-{5}][{6}-{7}])|([{8}-{9}]$)|(^[{10}-{11}])'.format(chr(0xd800),
+                                                                                                                                                                        chr(0xdbff),
+                                                                                                                                                                        chr(0xdc00),
+                                                                                                                                                                        chr(0xdfff),
+                                                                                                                                                                        chr(0xd800),
+                                                                                                                                                                        chr(0xdbff),
+                                                                                                                                                                        chr(0xdc00),
+                                                                                                                                                                        chr(0xdfff),
+                                                                                                                                                                        chr(0xd800),
+                                                                                                                                                                        chr(0xdbff),
+                                                                                                                                                                        chr(0xdc00),
+                                                                                                                                                                        chr(0xdfff))
 regex = re.compile(RE_XML_ILLEGAL)
 nsmap = {
     None       : "http://oval.mitre.org/XMLSchema/oval-definitions-5";,
@@ -101,7 +116,7 @@ def __getNewId (type):
 def __createOVALDpkginfoObject (name):
   """ Generate OVAL dpkginfo_object definition """
   
-  if not testsHash["obj"].has_key(name):
+  if name not in testsHash["obj"]:
     objectId = __getNewId ("object");
     object = __createXMLElement("dpkginfo_object",
       attrs={"id":objectId, 
@@ -118,7 +133,7 @@ def __createOVALTextfilecontentObject (pattern, path = "/etc", filename = "debia
   """ Generate OVAL textfilecontent_object definition """
   name = path + filename + pattern
   
-  if not testsHash["obj"].has_key(name):
+  if name not in testsHash["obj"]:
     objectId = __getNewId ("object");
     object = __createXMLElement("textfilecontent_object",
       attrs={"id":objectId, 
@@ -137,7 +152,7 @@ def __createOVALUnameObject ():
   """ Generate OVAL textfilecontent_object definition """
   name = "uname_object"
   
-  if not testsHash["obj"].has_key(name):
+  if name not in testsHash["obj"]:
     objectId = __getNewId ("object");
     object = __createXMLElement("uname_object",
       attrs={"id":objectId, 
@@ -155,7 +170,7 @@ def __createOVALState (value, operation = "less than"):
     Use state hash for optimization of resulted XML
   """
   #TODO: Add arch state generation
-  if not testsHash["dpkgSte"].has_key(operation) or not testsHash["dpkgSte"][operation].has_key(value):
+  if operation not in testsHash["dpkgSte"] or value not in testsHash["dpkgSte"][operation]:
     stateId = __getNewId ("state")
 
     state = __createXMLElement("dpkginfo_state", 
@@ -183,7 +198,7 @@ def __createOVALUnameState (field, value, operation = "equals"):
     pass
 
   #TODO: Add arch state generation
-  if not testsHash["unameSte"].has_key(operation) or not testsHash["unameSte"][operation].has_key(value):
+  if operation not in testsHash["unameSte"] or value not in testsHash["unameSte"][operation]:
     stateId = __getNewId ("state")
 
     state = __createXMLElement("uname_state", 
@@ -204,7 +219,7 @@ def __createOVALTextfilecontentState (value, operation = "equals"):
     Use state hash for optimization of resulted XML
   """
   #TODO: Add arch state generation
-  if not testsHash["fileSte"].has_key(operation) or not testsHash["fileSte"][operation].has_key(value):
+  if operation not in testsHash["fileSte"] or value not in testsHash["fileSte"][operation]:
     stateId = __getNewId ("state")
 
     state = __createXMLElement("textfilecontent_state", 
@@ -240,7 +255,7 @@ def __createDPKGTest(name, version):
 def __createTest(testType, value):
   """ Generate OVAL test for release or architecture cases"""
   
-  if not testsHash[testType].has_key(value):
+  if value not in testsHash[testType]:
     comment = None
       
     ref = __getNewId("test")
@@ -321,7 +336,7 @@ def createPlatformDefinition (release, data, cve):
   archCriteria = __createXMLElement ("criteria", attrs = {"comment" : "Architecture section", "operator" : "OR"})
 
   # Handle architecture independed section
-  if data.has_key ("all"):
+  if "all" in data:
     archIndepCriteria = __createXMLElement ("criteria", attrs={"comment" : "Architecture independent section", "operator" : "AND"})
     
     archIndepCriteria.append ( __createXMLElement ("criterion", attrs = {"test_ref" : __createTest("arch", "all"), "comment" : "all architecture"}))
@@ -346,8 +361,8 @@ def createPlatformDefinition (release, data, cve):
     else:
       dsaData = diff.getDiffer()
     
-    diff.Clean()  
-    for (key, value) in dsaData.iteritems():
+    diff.Clean()
+    for key, value in dsaData.items():
       if key != "all":
         diff.compareElement(key, value)
     
@@ -405,28 +420,28 @@ def createDefinition (cve, oval):
     cve -- CVE dentificator
     oval -- CVE parsed data
   """
-  if not oval.has_key("release"):
+  if "release" not in oval:
     logging.log(logging.WARNING, "CVE %s: Release definition not well formatted. Ignoring this CVE." % cve)
     raise CVEFormatException
     
-  if not oval.has_key("packages"):
+  if "packages" not in oval:
     logging.log(logging.WARNING, "CVE %s: Package information missed. Ignoring this CVE." % cve)
     oval["packages"] = ""
     return None
 
-  if not oval.has_key("title"):
+  if "title" not in oval:
     logging.log(logging.WARNING, "CVE %s: title information missed." % cve)
     oval["title"] = ""
 
-  if not oval.has_key("description"):
+  if "description" not in oval:
     logging.log(logging.WARNING, "CVE %s: Description information missed." % cve)
     oval["description"] = ""
 
-  if not oval.has_key("moreinfo"):
+  if "moreinfo" not in oval:
     logging.log(logging.WARNING, "CVE %s: Moreinfo information missed." % cve)
     oval["moreinfo"] = ""
 
-  if not oval.has_key("secrefs"):
+  if "secrefs" not in oval:
     logging.log(logging.WARNING, "CVE %s: Secrefs information missed." % cve)
     oval["secrefs"] = ""
 
@@ -457,7 +472,7 @@ def createDefinition (cve, oval):
   #TODO: move this info to other place
   metadata.append ( __createXMLElement ("description", oval["description"]))
   debianMetadata = __createXMLElement ("debian")
-  if oval.has_key("date"):
+  if "date" not in oval:
     debianMetadata.append ( __createXMLElement ("date", oval["date"]) )
   debianMetadata.append ( __createXMLElement ("moreinfo", oval["moreinfo"]))
   metadata.append (debianMetadata)
@@ -496,9 +511,7 @@ def createOVALDefinitions (ovals):
   
   definitions = etree.SubElement (root, "definitions")
   
-  keyids = ovals.keys()
-  keyids.sort()
-  for cve in keyids:
+  for cve in ovals.keys():
     try:
       # filter for CVEs
       if cve.find("CVE-") < 0:
@@ -515,4 +528,4 @@ def createOVALDefinitions (ovals):
 
 def printOVALDefinitions (root):
   if len(root.find("definitions")):
-    print etree.tostring(root, encoding='utf-8', pretty_print=True, xml_declaration=True)
+    print(etree.tostring(root, encoding='utf-8', pretty_print=True, xml_declaration=True))
diff --git a/english/security/oval/oval/parser/dsa.py b/english/security/oval/oval/parser/dsa.py
index 6ce2d9816f2..d47305124f3 100644
--- a/english/security/oval/oval/parser/dsa.py
+++ b/english/security/oval/oval/parser/dsa.py
@@ -9,6 +9,7 @@
 #    <isvulnerable>
 #    <fixed>
 #
+# (c) 2019 Carsten Schoenert
 # (c) 2016 Sebastien Delafond <sdelafond@gmail.com>
 # (c) 2015 Nicholas Luedtke
 # (c) 2007 Pavel Vinogradov     
@@ -52,7 +53,7 @@ def parseFile (path):
     result = datepatern.search (line)
     if result:
       date = result.groups()[0]
-      normDate = lambda (date): "-".join([(len(p) > 1 and p or "0"+p) for p in date.split("-")])
+      normDate = lambda date: "-".join([(len(p) > 1 and p or "0"+p) for p in date.split("-")])
       data["date"] = normDate(date)
     
     descrpatern = re.compile (r'(CVE-\d+-\d+)')
-- 
2.20.1

>From f3cc5d3a0691cc98c0a537dde8ed5a4984e15114 Mon Sep 17 00:00:00 2001
From: Carsten Schoenert <c.schoenert@t-online.de>
Date: Sat, 16 Mar 2019 15:38:02 +0100
Subject: [PATCH 2/2] Python scripts: modify to use Python3 syntax

Two more scripts that need some modifications to be usable with Python3.
---
 .../mirror/timestamps/archive_mirror_check.py | 60 +++++++++----------
 english/mirror/timestamps/mirror_check.py     | 22 +++----
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/english/mirror/timestamps/archive_mirror_check.py b/english/mirror/timestamps/archive_mirror_check.py
index 64c757e88c9..7461cbc7686 100755
--- a/english/mirror/timestamps/archive_mirror_check.py
+++ b/english/mirror/timestamps/archive_mirror_check.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-import sys, urllib, htmllib, httplib, formatter, urlparse, re
+import sys, urllib.request, urllib.parse, urllib.error, htmllib, http.client, formatter, urllib.parse, re
 import string, time, socket, signal
 
 # Checks mirror sites to see if they are up to date
@@ -26,7 +26,7 @@ cont_line = re.compile('\s')
 TIMEOUT = 25
 def handler(signum, frame):
 	# print 'Signal handler called with signal', signum
-	raise IOError, "gave up on site (" + repr(TIMEOUT) + " second limit)"
+	raise IOError('gave up on site (' + repr(TIMEOUT) + ' second limit)')
 
 itemre = re.compile(r'(.*?): (.*)\n')
 def process_line(line, site):
@@ -34,7 +34,7 @@ def process_line(line, site):
 		return
 	res = itemre.match(line)
 	if not res:
-		print "  bad line found: " + line[:-1]
+		print('  bad line found: {}'.format(line[:-1]))
 	else:
 		site[string.lower(res.group(1))] = res.group(2)
 	line = ''
@@ -46,42 +46,42 @@ def check_site(hostname, loc):
 	try:
 		hostaddress = socket.gethostbyname(hostname)
 	except socket.error:
-		print 'Could not resolve host ' + hostname
+		print('Could not resolve host {}'.format(hostname))
 		if badfd:
 			badfd.write('Could not resolve host ' + hostname + '\n')
 		return 1
-	print mirror + ' (' + hostaddress + ')'
+	print('{} ( {} )'.format(mirror, hostaddress))
 	sys.stdout.flush()
 	try:
 		signal.signal(signal.SIGALRM, handler)
 		signal.alarm(TIMEOUT)
-		parts = urlparse.urlparse(mirror)
-		h = httplib.HTTP(parts[1])
+		parts = urllib.parse.urlparse(mirror)
+		h = http.client.HTTP(parts[1])
 		h.putrequest('HEAD', parts[2])
 		h.putheader('Host', parts[1])
 		h.endheaders()
 		errcode, errmsg, headers = h.getreply()
 		signal.alarm(0)
-	except (IOError, socket.error), arg:
+	except (IOError, socket.error) as arg:
 		if badfd:
 			badfd.write(mirror + ' (' + hostaddress + ')' + '\n')
 			badfd.write('  Error accessing site: ' + str(arg.args[0]) + '\n')
-		print '  Error accessing site: ' + str(arg.args[0])
+		print('  Error accessing site: {}'.format(str(arg.args[0])))
 		return 1
 	if errcode != 200:
 		if badfd:
 			badfd.write(mirror + ' (' + hostaddress + ')' + '\n')
 			badfd.write('  Error: site returned Error Code ' + str(errcode) + '\n')
-		print '  Error: site returned Error Code ' + str(errcode)
+		print('  Error: site returned Error Code {}'.format(str(errcode)))
 		return 1
 	# site must be good so actually download it
 	try:
-		current = urllib.urlopen(mirror)
-	except (IOError, socket.error), arg:
+		current = urllib.request.urlopen(mirror)
+	except (IOError, socket.error) as arg:
 		if badfd:
 			badfd.write(mirror + ' (' + hostaddress + ')' + '\n')
 			badfd.write('  Error accessing (supposedly good) site: ' + str(arg.args[0]) + '\n')
-		print '  Error (supposedly good) accessing site: ' + str(arg.args[0])
+		print('  Error (supposedly good) accessing site: {}'.format(str(arg.args[0])))
 		return 1
 	parse = htmllib.HTMLParser(formatter.NullFormatter())
 	parse.feed(current.read())
@@ -96,18 +96,18 @@ def check_site(hostname, loc):
 		if badfd:
 			badfd.write(mirror + ' (' + hostaddress + ')' + '\n')
 			badfd.write('  Error with page: no useful links' + '\n')
-		print '  Error with page: no useful links'
+		print('  Error with page: no useful links')
 		return 1
 	urls = {}
 	for url in links:
-		fullurl = urlparse.urljoin(mirror, url)
+		fullurl = urllib.parse.urljoin(mirror, url)
 		try:
-			current = urllib.urlopen(fullurl)
-		except IOError, args:
+			current = urllib.request.urlopen(fullurl)
+		except IOError as args:
 			if badfd:
 				badfd.write(mirror + ' (' + hostaddress + ')' + '\n')
 				badfd.write(' Error: ' + str(args) + '\n')
-			print "  Error: " + str(args)
+			print('  Error: '.format(str(args)))
 			return 1
 		# Fri Apr 20 17:43:33 UTC 2001
 		# %a  %b  %d %X       %Z  %Y
@@ -128,20 +128,20 @@ def check_site(hostname, loc):
 		except:
 			if badfd:
 				badfd.write(mirror + ' (' + hostaddress + ')' + '\n')
-				badfd.write("  Error with file " + url + '\n')
-				badfd.write("  Data = " + data + '\n')
-			print "  Error with file " + url
-			print "  Data = " + data
+				badfd.write('  Error with file ' + url + '\n')
+				badfd.write('  Data = ' + data + '\n')
+			print('  Error with file {}'.format(url))
+			print('  Data = {}'.format(data))
 			return 1
 		if (epochtime - oodtime) < 0:
 			urls[epochtime] = out + ' (OUT OF DATE)'
 			siteprob = 1
 		else:
 			urls[epochtime] = out
-	tmp = urls.keys()
+	tmp = list(urls.keys())
 	tmp.sort()
 	for times in tmp:
-		print urls[times]
+		print(urls[times])
 	if siteprob and badfd:
 		badfd.write(mirror + ' (' + hostaddress + ')' + '\n')
 		for times in tmp:
@@ -157,7 +157,7 @@ def check_site(hostname, loc):
 ignored = re.compile('(ftp.us.debian.org|mirror.aarnet.edu.au|ibiblio.org|ftp.wa.au.debian.org|ftp.nz.debian.org)')
 def ignored_site(site, currentsite):
 	if ignored.match(site) and not currentsite:
-		print "Ignoring site " + site
+		print('Ignoring site {}'.format(site))
 		return 1
 	return 0
 
@@ -172,16 +172,16 @@ def check(currentsite = ''):
 		if (newline == '\n' or newline == '') and sitenotempty:
 			process_line(line, site)
 			if not ignored_site(site['site'], currentsite):
-				if (site.has_key('site') and site.has_key('archive-http')) and (not currentsite or site['site'] == currentsite):
+				if ('site' in site and 'archive-http' in site) and (not currentsite or site['site'] == currentsite):
 					siteprob = check_site(site['site'], site['archive-http'])
 #					if site.has_key('maintainer'):
-#						print '  Maintainer: ' + site['maintainer']
+#						print('  Maintainer: {}'.format(site['maintainer']))
 #						if badfd and siteprob:
 #							badfd.write('  Maintainer: ' + site['maintainer'] + '\n')
-				if site.has_key('site') and site.has_key('nonus-http') and (not currentsite or site['site'] == currentsite):
+				if 'site' in site and 'nonus-http' in site and (not currentsite or site['site'] == currentsite):
 					siteprob = check_site(site['site'], site['nonus-http'])
 #					if site.has_key('maintainer'):
-#						print '  Maintainer: ' + site['maintainer']
+#						print('  Maintainer: {}'.format(site['maintainer']))
 #						if badfd and siteprob:
 #							badfd.write('  Maintainer: ' + site['maintainer'] + '\n')
 			if site['site'] == currentsite:
@@ -208,4 +208,4 @@ if len(sys.argv) > 1:
 	sys.exit(1)
 else:
 	check()
-print 'Done'
+print('Done')
diff --git a/english/mirror/timestamps/mirror_check.py b/english/mirror/timestamps/mirror_check.py
index ecba794c5a0..2cf104e4ef6 100755
--- a/english/mirror/timestamps/mirror_check.py
+++ b/english/mirror/timestamps/mirror_check.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-import sys, urllib, htmllib, httplib, formatter, urlparse, re, string, time
+import sys, urllib.request, urllib.parse, urllib.error, htmllib, http.client, formatter, urllib.parse, re, string, time
 
 # given a list of addresses, e.g. 'www.debian.org, www.uk.debian.org',
 # the timestamp files are retrieved and printed out.
@@ -11,22 +11,22 @@ sites.pop(0)
 mailto = re.compile('mailto')
 for site in sites:
 	mirror = 'http://' + site + '/mirror/timestamps/'
-	print mirror
+	print(mirror)
 	try:
-		parts = urlparse.urlparse(mirror)
-		h = httplib.HTTP(parts[1])
+		parts = urllib.parse.urlparse(mirror)
+		h = http.client.HTTP(parts[1])
 		h.putrequest('HEAD', parts[2])
 		h.putheader('Host', parts[1])
 		h.endheaders()
 		errcode, errmsg, headers = h.getreply()
 	except IOError:
-		print '  Problem accessing site'
+		print('  Problem accessing site')
 		continue
 	if errcode != 200:
-		print '  Site returned Error Code ' + str(errcode)
+		print('  Site returned Error Code {}'.format(str(errcode)))
 		continue
 	# site must be good so actually download it
-	current = urllib.urlopen(mirror)
+	urllib.request.urlopen(mirror)
 	parse = htmllib.HTMLParser(formatter.NullFormatter())
 	parse.feed(current.read())
 	parse.close()
@@ -41,8 +41,8 @@ for site in sites:
 		# urls.append(url)
 		if mailto.match(url):
 			continue
-		fullurl = urlparse.urljoin(mirror, url)
-		current = urllib.urlopen(fullurl)
+		fullurl = urllib.parse.urljoin(mirror, url)
+		current = urllib.request.urlopen(fullurl)
 		# Fri Apr 20 17:43:33 UTC 2001
 		# %a  %b  %d %X       %Z  %Y
 		data = current.readline()[:-1]
@@ -54,7 +54,7 @@ for site in sites:
 		# same amount, it shouldn't matter.
 		epochtime = time.mktime((int(year), months[mon], int(dom), int(hr), int(min), int(sec), daysofweek[dow], 0, 0))
 		urls[epochtime] = out
-	tmp = urls.keys()
+	tmp = list(urls.keys())
 	tmp.sort()
 	for times in tmp:
-		print urls[times]
+		print(urls[times])
-- 
2.20.1


Reply to: