mua and mail service provider statistics
hi list,
as a first time debian-user subscriber, I feel been buried alive by
incoming emails, though I heard there are some monster maillists,
linux kernel maillist for example, are carrying even heavier traffic.
Wondering how to manage so many emails, out of curiosity, I did a
statistics on choice of mail client and email service, based on 1494
messages received since subscribed to this list. Here is what I found.
TOP20
MUA | email provider
------------------------------------|-------------------------------------
Gmail 83 25.7% | gmail.com 110 34.1% |
Mozilla 75 23.2% | debian.org 8 2.5% |
Mutt 54 16.7% | gmx.de 4 1.2% |
Unknown 20 6.2% | yahoo.com 3 0.9% |
KMail 15 4.6% | web.de 3 0.9% |
Gnus 11 3.4% | cox.net 3 0.9% |
Evolution 11 3.4% | comcast.net 3 0.9% |
Claws 11 3.4% | well-adjusted.de 2 0.6% |
Sylpheed 6 1.9% | pobox.com 2 0.6% |
Apple 3 0.9% | karall-edv.at 2 0.6% |
Zimbra 2 0.6% | googlemail.com 2 0.6% |
YahooMailRC 2 0.6% | gmx.net 2 0.6% |
YahooMailClassic 2 0.6% | free.fr 2 0.6% |
Pan 2 0.6% | daniel-gr-andersson.com 2 0.6% |
MessagingEngine.com 2 0.6% | arcor.de 2 0.6% |
Loom 2 0.6% | zoho.com 1 0.3% |
Alpine 2 0.6% | yxit.co.uk 1 0.3% |
tin 1 0.3% | yahoo.com.hk 1 0.3% |
slrn 1 0.3% | yahoo.co.in 1 0.3% |
netcat 1 0.3% | ya.ru 1 0.3% |
Total Messages: 1494
Total Users: 323
The short story is, GMAIL prevail.
--
Chen Wei
#!/usr/bin/env python
# to count what kind of mail client folks are using in maillist
import os
import re
def guess_mua(received_from):
pattern = {'google.com':'Gmail', 'blackberry':'Blackberry',
'yahoo.com':'Yahoo', 'nabble.com':'Nabble'}
mua = None
for key in pattern:
if key in received_from:
mua = pattern[key]
break
if not mua:
mua = 'Unknown'
return mua
mailfiles = os.walk('/home/wei/.Maildir/Debian-User')
maildir_files = []
for mail in mailfiles:
dirpath = mail[0]
for f in mail[2]:
maildir_files.append(os.path.join(dirpath, f))
mailstat = {}
for m in maildir_files:
content = open(m)
mua = None
sender = None
for line in content:
if line.startswith('From:'):
address = re.findall('[^<\s]+@[^>\s]+', line)
if address:
sender = address[0]
else:
sender = line.rstrip()[5:] # string after 'From: '
for agent_header in ('User-Agent:', 'X-Mailer:' , 'X-Newsreader:'):
if line.startswith(agent_header):
mua = line.rstrip().split(":")[1].split()[0].split('/')[0]
if 'Thunderbird' in mua:
mua = 'Mozilla'
break
if sender:
if mua:
break
if line.startswith('Received: from'):
received_from = line
if not mua:
mua = guess_mua(received_from)
mailstat[sender] = mua
content.close()
def sort_count(input_list):
'''given a list, return a 2D list sorted by item count'''
count = {}
for mua in input_list:
if count.has_key(mua):
count[mua] += 1
else:
count[mua] = 1
count_sort = []
for mua in count:
count_sort.append((count[mua], mua))
count_sort.sort(reverse=True)
return count_sort
domains = []
for sender in mailstat:
try:
domains.append(sender.split('@')[1])
except Exception:
pass
mua_statis = sort_count(mailstat.values())
mail_provider = sort_count(domains)
# pretty print
def line_gen(item, total):
line = '{0:<25} {1:>3} {2:>5} | '.format(item[1], str(item[0]),
'{0:.1%}'.format(item[0] / float(total)))
return line
total_user = len(mailstat)
for x in range(20):
line = line_gen(mua_statis[x], total_user)
line += line_gen(mail_provider[x], total_user)
print line
def sum_counted_list(mylist):
count = 0
for item in mylist:
count += item[0]
return count
print '\nTotal Users: ' + str(total_user)
Reply to: