Re: Handling lots of mail
As a stop gap, I ran a script I have to trim down my mailboxes. Here
it is for anyone who's interested. GPL license.
This takes mbox files greater than sizeLeft and splits them so only
about sizeLeft bytes remain (it finds message boundaries). It then
names the old part by the date of the last message, and compresses it.
I call the script chop.py, so useage is
chop.py [list of mbox files]
#! /usr/bin/python
import fcntl,os,re,stat,sys
import pdb
sizeLeft = 15*1024*1024 # Size to leave
pattern = re.compile(r"^From \S+ (?P<weekday>\w\w\w) (?P<month>\w\w\w)"\
r" (?P<day>[0-9]{1,2}) \d\d:\d\d:\d\d (?P<year>\d{2,4})$")
patt2 = re.compile(r"^Received: from")
blockSize = 1024*1024 # size to transfer
def handleFile(file):
"Process a single file"
if os.stat(file)[stat.ST_SIZE] < sizeLeft :
print "%s is not long enough to chop"%file
return
fh = open(file, "r+")
fh.seek(-sizeLeft, 2)
line = fh.readline() # read to end of partial line
while 1:
pos = fh.tell()
line = fh.readline()
if not line:
break # hit EOF
if line[:5] == "From " :
#pdb.set_trace()
pass
m = pattern.match(line)
if m:
line = fh.readline()
if patt2.match(line):
dateString = m.group('year')+"-"+m.group('month')+\
"-"+m.group('day')
chopFile(fh, file, pos, "."+dateString)
return
else:
print "Odd. Failed to match on 2nd line. Continuing search."
# note we skip over the 2nd line
print "Could not find message start in last %d characters of %s"%(
sizeLeft, file)
fh.close()
def chopFile(fh, name, pos, decoration):
"""Chops file name, open with fh, at pos. Add decoration to name of
new first part"""
# fh should be open for mod on entry. It will be closed
# by this function
status = fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
if status :
print "Couldn't lock ", file
fh.close()
return
try:
_chopFile(fh, name, pos, decoration)
finally:
# off with their locks
fh = open(file,"r")
status = fcntl.flock(fh.fileno(), fcntl.LOCK_UN)
if status:
print "Failed to remove lock for %s"%file
fh.close()
def _chopFile(fh, name, pos, decoration):
"Chops without worrying about locking"
# copy tail
fhshort = open(file+".short", "w")
fh.seek(pos)
block = fh.read(blockSize)
while block:
fhshort.write(block)
block = fh.read(blockSize)
fhshort.close()
# and create long start
fh.truncate(pos)
fh.close()
# assign final names
archiveName = file+decoration
os.rename(file, archiveName)
os.rename(file+".short", file)
os.system("bzip2 "+archiveName)
print "Chopped start of %s to %s"%(file, archiveName+".bz2")
# executable part
for file in sys.argv[1:]:
handleFile(file)
--
To UNSUBSCRIBE, email to debian-user-request@lists.debian.org
with a subject of "unsubscribe". Trouble? Contact listmaster@lists.debian.org
Reply to: