[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Список непереведённых описаний пакетов



В Wed, 3 May 2017 09:19:24 +0300
Sergey Alyoshin <alyoshin.s@gmail.com> пишет:

> On Wed, May 3, 2017 at 12:23 AM, Sergey Alyoshin
> <alyoshin.s@gmail.com> wrote:
> > Странно, но с | tee похоже не работает.  
> 
> Дело в буферизации stdout:
> 
> PYTHONUNBUFFERED=1  ./ddtp-popcon.py -v | tee log

А может лучше на Python 3?! ;)
во вложении...
#!/usr/bin/python3

desc =  'Checking untranslated packages description for language'

popcon_addr = 'http://popcon.debian.org/by_vote'
ddtp_addr = 'https://ddtp2.debian.net'

import urllib.request
import re
import argparse

check_lang = 'ru'
total_packages_num = 1000

arg = argparse.ArgumentParser(description = desc)
arg.add_argument('-n', '--number',
		help = 'check number of package')
arg.add_argument('-l', '--language',
		help = 'check for language')
arg.add_argument('-v', '--verbose',
		help = 'be verbose',
                action = 'store_true')

args = arg.parse_args()

if args.language != None:
	check_lang = args.language

if args.number != None:
	total_packages_num = int(args.number)

print('Checking for "%s" from %s by %s ...' % (check_lang, ddtp_addr, popcon_addr), flush=True)
uo = urllib.request.urlopen(popcon_addr)

# Description: <a href="ddt.cgi?desc_id=237239">237239</a><br>
# This Description is active<br>
desc_re = re.compile('(Description: <a href="ddt.cgi\?desc_id=)([0-9]+?)"')

# This Description is not yet translated to ru <a href="ddt.cgi?desc_id=52130&getuntrans=ru">
not_trans_re = re.compile('(.*Description is not yet translated to ' + check_lang + ') <a href="(.*?)">')

c = 0
while True:
	l = str(uo.readline(), 'utf-8')
	l = l.strip()

	# Skipping comment
	if l[0] == '#': continue

	c += 1
	if c > total_packages_num:
		break

	# Package name
	pkg = l.split()[1]

	for i in urllib.request.urlopen(ddtp_addr +
			'/ddt.cgi?package=' + pkg):

		desc_id = desc_re.match(str(i, 'utf-8'))
		if desc_id == None: continue

		desc_id = desc_id.group(2)

		if args.verbose:
			print('%s of %s %s id %s' % (c, total_packages_num, pkg, desc_id), flush=True)

		for j in urllib.request.urlopen(ddtp_addr +
				'/ddt.cgi?desc_id=' + desc_id):

			not_trans = not_trans_re.match(str(j, 'utf-8'))
			if not_trans == None: continue
			
			print('%s id %s\t%s/%s' % (pkg, desc_id, ddtp_addr, not_trans.group(2)), flush=True)

		# Check only first (active) description id
		break

uo.close()


Reply to: