#!/usr/bin/python -tt # # Copyright 2004-2006 Nathaniel W. Turner # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, # copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following # conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. """ USAGE cleanup-maildir [OPTION].. COMMAND FOLDERNAME.. DESCRIPTION Cleans up old messages in FOLDERNAME; the exact action taken depends on COMMAND. (See next section.) Note that FOLDERNAME is a name such as 'Drafts', and the corresponding maildir path is determined using the values of maildir-root, folder-prefix, and folder-seperator. COMMANDS archive - move old messages to subfolders based on message date trash - move old message to trash folder delete - permanently delete old messages OPTIONS -h, --help Show this help. -q, --quiet Suppress normal output. -v, --verbose Output extra information for testing. -n, --trial-run Do not actually touch any files; just say what would be done. -a, --age=N Only touch messages older than N days. Default is 14 days. -k, --keep-flagged-threads If any messages in a thread are flagged, do not touch them or any other messages in that thread. Note: the thread-detection mechanism is currently base purely on a message's subject. The In-Reply-To header is not currently used. -r, --keep-read If any messages are flagged as READ, do not touch them. -t, --trash-folder=F Use F as trash folder when COMMAND is 'trash'. Default is 'Trash'. --archive-folder=F Use F as the base for constructing archive folders. For example, if F is 'Archive', messages from 2004 might be put in the folder 'Archive.2004'. -d, --archive-hierarchy-depth=N Specify number of subfolders in archive hierarchy; 1 is just the year, 2 is year/month (default), 3 is year/month/day. --maildir-root=F Specifies folder that contains mail folders. Default is "$HOME/Maildir". --folder-seperator=str Folder hierarchy seperator. Default is '.' --folder-prefix=str Folder prefix. Default is '.' NOTES The following form is accepted for backwards compatibility, but is deprecated: cleanup-maildir --mode=COMMAND [OPTION].. FOLDERNAME.. EXAMPLES # Archive messages in 'Sent Items' folder over 30 days old cleanup-maildir --age=30 archive 'Sent Items'" # Delete messages over 2 weeks old in 'Lists/debian-devel' folder, # except messages that are part of a thread containing a flagged message. cleanup-maildir --keep-flagged-threads trash 'Lists.debian-devel' """ __version__ = "0.3.0" # $Id$ # $URL$ import mailbox import os.path import os import rfc822 import string import socket import time import logging import sys import getopt def mkMaildir(path): """Make a Maildir structure rooted at 'path'""" os.mkdir(path, 0700) os.mkdir(os.path.join(path, 'tmp'), 0700) os.mkdir(os.path.join(path, 'new'), 0700) os.mkdir(os.path.join(path, 'cur'), 0700) class MaildirWriter(object): """Deliver messages into a Maildir""" path = None counter = 0 def __init__(self, path=None): """Create a MaildirWriter that manages the Maildir at 'path' Arguments: path -- if specified, used as the default Maildir for this object """ if path != None: if not os.path.isdir(path): raise ValueError, 'Path does not exist: %s' % path self.path = path self.logger = logging.getLogger('MaildirWriter') def deliver(self, msg, path=None): """Deliver a message to a Maildir Arguments: msg -- a message object path -- the path of the Maildir; if None, uses default from __init__ """ if path != None: self.path = path if self.path == None or not os.path.isdir(self.path): raise ValueError, 'Path does not exist' tryCount = 1 srcFile = msg.getFilePath(); (dstName, tmpFile, newFile, dstFile) = (None, None, None, None) while 1: try: dstName = "%d.%d_%d.%s" % (int(time.time()), os.getpid(), self.counter, socket.gethostname()) tmpFile = os.path.join(os.path.join(self.path, "tmp"), dstName) newFile = os.path.join(os.path.join(self.path, "new"), dstName) self.logger.debug("deliver: attempt copy %s to %s" % (srcFile, tmpFile)) os.link(srcFile, tmpFile) # Copy into tmp self.logger.debug("deliver: attempt link to %s" % newFile) os.link(tmpFile, newFile) # Link into new except OSError, (n, s): self.logger.critical( "deliver failed: %s (src=%s tmp=%s new=%s i=%d)" % (s, srcFile, tmpFile, newFile, tryCount)) self.logger.info("sleeping") time.sleep(2) tryCount += 1 self.counter += 1 if tryCount > 10: raise OSError("too many failed delivery attempts") else: break # Successful delivery; increment deliver counter self.counter += 1 # For the rest of this method we are acting as an MUA, not an MDA. # Move message to cur and restore any flags dstFile = os.path.join(os.path.join(self.path, "cur"), dstName) if msg.getFlags() != None: dstFile += ':' + msg.getFlags() self.logger.debug("deliver: attempt link to %s" % dstFile) os.link(newFile, dstFile) os.unlink(newFile) # Cleanup tmp file os.unlink(tmpFile) class MessageDateError(TypeError): """Indicate that the message date was invalid""" pass class MaildirMessage(rfc822.Message): """An email message Has extra Maildir-specific attributes """ def getFilePath(self): if sys.hexversion >= 0x020500F0: return self.fp._file.name else: return self.fp.name def isFlagged(self): """return true if the message is flagged as important""" import re fname = self.getFilePath() if re.search(r':.*F', fname) != None: return True return False def getFlags(self): """return the flag part of the message's filename""" parts = self.getFilePath().split(':') if len(parts) == 2: return parts[1] return None def isNew(self): """return true if the message is marked as unread""" # XXX should really be called isUnread import re fname = self.getFilePath() if re.search(r':.*S', fname) != None: return False return True def getSubject(self): """get the message's subject as a unicode string""" import email.Header s = self.getheader("Subject") try: return u"".join(map(lambda x: x[0].decode(x[1] or 'ASCII', 'replace'), email.Header.decode_header(s))) except(LookupError): return s def getSubjectHash(self): """get the message's subject in a "normalized" form This currently means lowercasing and removing any reply or forward indicators. """ import re import string s = self.getSubject() if s == None: return '(no subject)' return re.sub(r'^(re|fwd?):\s*', '', string.strip(s.lower())) def getDateSent(self): """Get the time of sending from the Date header Returns a time object using time.mktime. Not very reliable, because the Date header can be missing or spoofed (and often is, by spammers). Throws a MessageDateError if the Date header is missing or invalid. """ dh = self.getheader('Date') if dh == None: return None try: return time.mktime(rfc822.parsedate(dh)) except ValueError: raise MessageDateError("message has missing or bad Date") except TypeError: # gets thrown by mktime if parsedate returns None raise MessageDateError("message has missing or bad Date") except OverflowError: raise MessageDateError("message has missing or bad Date") def getDateRecd(self): """Get the time the message was received""" # XXX check that stat returns time in UTC, fix if not return os.stat(self.getFilePath())[8] def getDateSentOrRecd(self): """Get the time the message was sent, fall back on time received""" try: d = self.getDateSent() if d != None: return d except MessageDateError: pass return self.getDateRecd() def getAge(self): """Get the number of seconds since the message was received""" msgTime = self.getDateRecd() msgAge = time.mktime(time.gmtime()) - msgTime return msgAge / (60*60*24) class MaildirCleaner(object): """Clean a maildir by deleting or moving old messages""" __trashWriter = None __mdWriter = None stats = {'total': 0, 'delete': 0, 'trash': 0, 'archive': 0} keepSubjects = {} archiveFolder = None archiveHierDepth = 2 folderBase = None folderPrefix = "." folderSeperator = "." keepFlaggedThreads = False trashFolder = "Trash" isTrialRun = False keepRead = False def __init__(self, folderBase=None): """Initialize the MaildirCleaner Arguments: folderBase -- the directory in which the folders are found """ self.folderBase = folderBase self.__mdWriter = MaildirWriter() self.logger = logging.getLogger('MaildirCleaner') self.logger.setLevel(logging.DEBUG) def __getTrashWriter(self): if not self.__trashWriter: path = os.path.join(self.folderBase, self.folderPrefix + self.trashFolder) self.__trashWriter = MaildirWriter(path) return self.__trashWriter trashWriter = property(__getTrashWriter) def scanSubjects(self, folderName): """Scans for flagged subjects""" self.logger.info("Scanning for flagged subjects...") if (folderName == 'INBOX'): path = self.folderBase else: path = os.path.join(self.folderBase, self.folderPrefix + folderName) maildir = mailbox.Maildir(path, MaildirMessage) self.keepSubjects = {} for i, msg in enumerate(maildir): if msg.isFlagged(): self.keepSubjects[msg.getSubjectHash()] = 1 self.logger.debug("Flagged (%d): %s", i, msg.getSubjectHash()) self.logger.info("Done scanning.") def clean(self, mode, folderName, minAge): """Trashes or archives messages older than minAge days Arguments: mode -- the cleaning mode. Valid modes are: trash -- moves the messages to a trash folder archive -- moves the messages to folders based on their date delete -- deletes the messages folderName -- the name of the folder on which to operate This is a name like "Stuff", not a filename minAge -- messages younger than minAge days are left alone """ if not mode in ('trash', 'archive', 'delete'): raise ValueError if (self.keepFlaggedThreads): self.scanSubjects(folderName) archiveFolder = self.archiveFolder if (archiveFolder == None): if (folderName == 'INBOX'): archiveFolder = "" else: archiveFolder = folderName if (folderName == 'INBOX'): path = self.folderBase else: path = os.path.join(self.folderBase, self.folderPrefix + folderName) maildir = mailbox.Maildir(path, MaildirMessage) fakeMsg = "" if self.isTrialRun: fakeMsg = "(Not really) " # Move old messages for i, msg in enumerate(maildir): if self.keepFlaggedThreads == True \ and msg.getSubjectHash() in self.keepSubjects: self.log(logging.DEBUG, "Keeping #%d (topic flagged)" % i, msg) else: if (msg.getAge() >= minAge) and ((not self.keepRead) or (self.keepRead and msg.isNew())): if mode == 'trash': self.log(logging.INFO, "%sTrashing #%d (old)" % (fakeMsg, i), msg) if not self.isTrialRun: self.trashWriter.deliver(msg) os.unlink(msg.getFilePath()) elif mode == 'delete': self.log(logging.INFO, "%sDeleting #%d (old)" % (fakeMsg, i), msg) if not self.isTrialRun: os.unlink(msg.getFilePath()) else: # mode == 'archive' # Determine subfolder path mdate = time.gmtime(msg.getDateSentOrRecd()) datePart = str(mdate[0]) if self.archiveHierDepth > 1: datePart += self.folderSeperator \ + time.strftime("%m-%b", mdate) if self.archiveHierDepth > 2: datePart += self.folderSeperator \ + time.strftime("%d-%a", mdate) subFolder = archiveFolder + self.folderSeperator \ + datePart sfPath = os.path.join(self.folderBase, self.folderPrefix + subFolder) self.log(logging.INFO, "%sArchiving #%d to %s" % (fakeMsg, i, subFolder), msg) if not self.isTrialRun: # Create the subfolder if needed if not os.path.exists(sfPath): mkMaildir(sfPath) # Deliver self.__mdWriter.deliver(msg, sfPath) os.unlink(msg.getFilePath()) self.stats[mode] += 1 else: self.log(logging.DEBUG, "Keeping #%d (fresh)" % i, msg) self.stats['total'] += 1 def log(self, lvl, text, msgObj): """Log some text with the subject of a message""" subj = msgObj.getSubject() if subj == None: subj = "(no subject)" self.logger.log(lvl, text + ": " + subj) # Defaults minAge = 14 mode = None logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) logging.disable(logging.INFO - 1) logger = logging.getLogger('cleanup-maildir') cleaner = MaildirCleaner() # Read command-line arguments try: opts, args = getopt.getopt(sys.argv[1:], "hqvnrm:t:a:kd:", ["help", "quiet", "verbose", "version", "mode=", "trash-folder=", "age=", "keep-flagged-threads", "keep-read", "folder-seperator=", "folder-prefix=", "maildir-root=", "archive-folder=", "archive-hierarchy-depth=", "trial-run"]) except getopt.GetoptError, (msg, opt): logger.error("%s\n\n%s" % (msg, __doc__)) sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): print __doc__ sys.exit() if o in ("-q", "--quiet"): logging.disable(logging.WARNING - 1) if o in ("-v", "--verbose"): logging.disable(logging.DEBUG - 1) if o == "--version": print __version__ sys.exit() if o in ("-n", "--trial-run"): cleaner.isTrialRun = True if o in ("-m", "--mode"): logger.warning("the --mode flag is deprecated (see --help)") if a in ('trash', 'archive', 'delete'): mode = a else: logger.error("%s is not a valid command" % a) sys.exit(2) if o in ("-t", "--trash-folder"): cleaner.trashFolder = a if o == "--archive-folder": cleaner.archiveFolder = a if o in ("-a", "--age"): minAge = int(a) if o in ("-k", "--keep-flagged-threads"): cleaner.keepFlaggedThreads = True if o in ("-r", "--keep-read"): cleaner.keepRead = True if o == "--folder-seperator": cleaner.folderSeperator = a if o == "--folder-prefix": cleaner.folderPrefix = a if o == "--maildir-root": cleaner.folderBase = a if o in ("-d", "--archive-hierarchy-depth"): archiveHierDepth = int(a) if archiveHierDepth < 1 or archiveHierDepth > 3: sys.stderr.write("Error: archive hierarchy depth must be 1, " + "2, or 3.\n") sys.exit(2) cleaner.archiveHierDepth = archiveHierDepth if not cleaner.folderBase: cleaner.folderBase = os.path.join(os.environ["HOME"], "Maildir") if mode == None: if len(args) < 1: logger.error("No command specified") sys.stderr.write(__doc__) sys.exit(2) mode = args.pop(0) if not mode in ('trash', 'archive', 'delete'): logger.error("%s is not a valid command" % mode) sys.exit(2) if len(args) == 0: logger.error("No folder(s) specified") sys.stderr.write(__doc__) sys.exit(2) logger.debug("Mode is " + mode) # Clean each folder for dir in args: logger.debug("Cleaning up %s..." % dir) cleaner.clean(mode, dir, minAge) logger.info('Total messages: %5d' % cleaner.stats['total']) logger.info('Affected messages: %5d' % cleaner.stats[mode]) logger.info('Untouched messages: %5d' % (cleaner.stats['total'] - cleaner.stats[mode]))