[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Handling lots of mail



As a stop gap, I ran a script I have to trim down my mailboxes.  Here
it is for anyone who's interested.  GPL license.


This takes mbox files greater than sizeLeft and splits them so only
about sizeLeft bytes remain (it finds message boundaries).  It then
names the old part by the date of the last message, and compresses it.

I call the script chop.py, so useage is
chop.py [list of mbox files]


#! /usr/bin/python
import fcntl,os,re,stat,sys
import pdb


sizeLeft = 15*1024*1024  # Size to leave
pattern = re.compile(r"^From \S+ (?P<weekday>\w\w\w) (?P<month>\w\w\w)"\
                     r" (?P<day>[0-9]{1,2}) \d\d:\d\d:\d\d (?P<year>\d{2,4})$")
patt2 = re.compile(r"^Received: from")
blockSize = 1024*1024  # size to transfer

def handleFile(file):
    "Process a single file"
    if os.stat(file)[stat.ST_SIZE] < sizeLeft :
        print "%s is not long enough to chop"%file
        return
    fh = open(file, "r+")
    fh.seek(-sizeLeft, 2)
    line = fh.readline()  # read to end of partial line
    while 1:
        pos = fh.tell()
        line = fh.readline()
        if not line:
            break       # hit EOF
        if line[:5] == "From " :
            #pdb.set_trace()
            pass
        m = pattern.match(line)
        if m:
            line = fh.readline()
            if patt2.match(line):
                dateString = m.group('year')+"-"+m.group('month')+\
                             "-"+m.group('day')
                chopFile(fh, file, pos, "."+dateString)
                return
            else:
                print "Odd.  Failed to match on 2nd line.  Continuing search."
                # note we skip over the 2nd line

    print "Could not find message start in last %d characters of %s"%(
        sizeLeft, file)
    fh.close()

    



def chopFile(fh, name, pos, decoration):
    """Chops file name, open with fh, at pos.  Add decoration to name of
    new first part"""
    # fh should be open for mod on entry.  It will be closed
    # by this function
    status = fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
    if status :
        print "Couldn't lock ", file
        fh.close()
        return
    try:
        _chopFile(fh, name, pos, decoration)
    finally:
        # off with their locks
        fh = open(file,"r")
        status = fcntl.flock(fh.fileno(), fcntl.LOCK_UN)
        if status:
            print "Failed to remove lock for %s"%file
        fh.close()


def _chopFile(fh, name, pos, decoration):
    "Chops without worrying about locking"
        
    # copy tail
    fhshort = open(file+".short", "w")
    fh.seek(pos)
    block = fh.read(blockSize)
    while block:
        fhshort.write(block)
        block = fh.read(blockSize)
    fhshort.close()

    # and create long start
    fh.truncate(pos)
    fh.close()

    # assign final names
    archiveName = file+decoration
    os.rename(file, archiveName)
    os.rename(file+".short", file)
    os.system("bzip2 "+archiveName)
    print "Chopped start of %s to %s"%(file, archiveName+".bz2")


# executable part
for file in sys.argv[1:]:
    handleFile(file)


-- 
To UNSUBSCRIBE, email to debian-user-request@lists.debian.org 
with a subject of "unsubscribe". Trouble? Contact listmaster@lists.debian.org



Reply to: