#! /usr/bin/env python """ version 0.9 Created by Howard Hansen howardh@halfmagic.com http://howard.editthispage.com) February 9, 2002 Copyright (c) 2002, Howard Hansen All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Howard Hansen nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import time, os, string, traceback class Logwatch: """ tuple index: """ IP = 0 USERNAME = 1 TIMESTAMP = 2 REQUEST_TYPE = 3 REQUEST_PATH = 4 REQUEST_PROTOCOL = 5 RESULT = 6 SIZE = 7 REFERRER = 8 AGENT = 9 HOST = 10 def __init__(self): pass def getchunk(self, text, start, delimiter): """ Start at index start in string text and search for delimiter. Return all characters between start and the delimiter, along with the ending position, as a tuple. """ self.chunk = '' self.index = string.find(text, delimiter, start) self.chunk = text[start:self.index] if self.index == -1: raise IndexError("Couln't find delimiter '%s' in string '%s'" % (delimiter, text)) return (self.chunk, self.index) def parseline(self, logline): """ Parse one line of a zope log, returning a 10-item tuple. """ (self.ip, self.nextstart) = self.getchunk(logline, 0, " ") (self.username, self.nextstart) = self.getchunk(logline, self.nextstart + 3, " ") (self.timestamp, self.nextstart) = self.getchunk(logline, self.nextstart + 2, "]") (self.request, self.nextstart) = self.getchunk(logline, self.nextstart + 3, '"') # this chunk is not simple, since I've found urls with spaces in them. # used to be: #(request_type, request_path, request_protocol) = string.split(request) self.request_tuple = string.split(self.request) self.request_type = self.request_tuple[0] self.request_protocol = self.request_tuple[-1] self.request_path = string.join(self.request_tuple[1:-1], ' ') (self.host, self.path) = self.splithostandpath(self.request_path) (self.result, self.nextstart) = self.getchunk(logline, self.nextstart + 2, ' ') (self.size, self.nextstart) = self.getchunk(logline, self.nextstart + 1, ' ') (self.referrer, self.nextstart) = self.getchunk(logline, self.nextstart + 2, '"') (self.agent, self.nextstart) = self.getchunk(logline, self.nextstart + 3, '"') return (self.ip, self.username, self.timestamp, self.request_type, self.path, self.request_protocol, self.result, self.size, self.referrer, self.agent, self.host) def splithostandpath(self, request_path): """ In sites using Zope's Virtual Host Monster behind Apache's URL rewriting this splits out the host name from the rather convoluted raw path request_path looks like: /VirtualHostBase/http/zope.mightydreams.com:80/uptime The result for this should be: ('zope.mightydreams.com', '/uptime') This may need modification for sites using different tools. """ self.prefix = '/VirtualHostBase/http/' if request_path[:len(self.prefix)] == self.prefix: request_path = request_path[len(self.prefix):] self.colonpos = string.find(request_path, ":") self.slashpos = string.find(request_path, "/") self.host = string.lower(request_path[:self.colonpos]) self.path = request_path[self.slashpos:] else: self.host = "*" self.path = request_path return (self.host, self.path) def summarize(self, tupleindex, sourcelist): """ Takes an index indicating which tuple item to summarize and a source list of tuples. Returns a dictionary with keys representing the data items in the list, and values representing the number of occurances of each item. """ self.results = {} for self.item in sourcelist: if self.results.has_key(self.item[tupleindex]): self.results[self.item[tupleindex]] += 1 else: self.results[self.item[tupleindex]] = 1 return self.results def sortdictbyvalue(self, dict): """ Takes a dictionary object and returns a list of tuples. Index 0 is the dictionary key, index 1 is the value associated. List is sorted in descending order by the value. """ self.items = dict.items() self.items.sort(lambda left, right: cmp(right[1], left[1])) return self.items def handlelog(self, tailcount, logfile): """ Gets last tailcount lines in the log file. Parses those lines into tuples and returns a list of those tuples. """ self.tailcommand = "tail -%d %s" % (tailcount, logfile) log = os.popen(self.tailcommand).readlines() self.parsedlog = [] for entry in log: self.parsedlog.append(self.parseline(entry)) return self.parsedlog def main(scr, logpath, entrycount, frequency): """ The main loop for curses mode. Updates screen once a second. """ paintscreen(scr, logpath, entrycount) nexttime = time.time() + frequency # lastupdate = time.strftime('%Y-%m-%d %H:%M:%S') lastupdate = time.strftime('%H:%M:%S') c = scr.getch() while c == -1: (size_y, size_x) = scr.getmaxyx() statusmessage(scr, "Last update: %s. Next in %d sec. Hit 'q' to quit or space to update now." % (lastupdate, int(nexttime-time.time()))) c = scr.getch() if c == ord('q'): scr.move(size_y-1,0) scr.clrtoeol() scr.refresh() break # Exit the while() elif c == ord('u') or c == ord(' '): paintscreen(scr, logpath, entrycount) nexttime = time.clock() + frequency c = -1 else: c = -1 if time.time() >= nexttime: paintscreen(scr, logpath, entrycount) nexttime = time.time() + frequency time.sleep(1) def statusmessage(scr, message): """ Adds a statusbar message at the bottom of the screen, as defined by scr. """ (size_y, size_x) = scr.getmaxyx() scr.move(size_y-1,0) scr.clrtoeol() scr.addnstr(size_y-1, 0, message[:size_x], size_x-1) def cpuusage(): """ Calculates current cpu usage from vmstat. Don't know if this works correctly. """ return tuple(os.popen('vmstat').readlines()[-1].split()[13:]) def statsspan(parsedlog): """ Takes the parsed log list as an argument. Returns a tuple with the seconds of the start and end times. """ timeindex = Logwatch().TIMESTAMP firstsec = zopetimeparse(parsedlog[0][timeindex]) lastsec = zopetimeparse(parsedlog[-1][timeindex]) return (firstsec, lastsec) def zopetimeparse(ztime): """ ztime looks like: '08/Feb/2002:16:37:53 -0700' Returns Unix time (seconds of the epoch) There's got to be a better way to do this, but I can't for the life of me figure it out right now. And this works, so far as I can tell. So sue me. Or better, write a better routine and send it to me! """ datetime, None = ztime.split() # strip time zone date, hour, min, sec = datetime.split(":") # split out time day, mon, year = date.split("/") # split out date monthhash = { 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12 } # I assume that all Zope logs use US format month = monthhash[mon] # calculate month number fmt = "%Y-%m-%d:%H:%M:%S" # format that time.strptime understands mask = "%s-%s-%s:%s:%s:%s" # string sub format to build fmt format t = (year, month, day, hour, min, sec) str = mask % t timet = time.strptime(str, fmt) return time.mktime(timet) def interval2str(secs): """ Prints out text representation of the time interval provided. There's probably a better way to do this too. Feel free to provide a patch! 132092.0 seconds returns 1d 12h 41m (1970, 1, 2, 12, 41, 32, 4, 2, 0) y m d hr min sec dow doy tz 3600.0 seconds returns 1h 0m 0s (1970, 1, 1, 1, 0, 0, 3, 1, 0) y m d hr min sec dow doy tz 0 1 2 3 4 5 6 7 8 """ timet = time.gmtime(secs) years = timet[0] - 1970 days = timet[7] - 1 hours = timet[3] min = timet[4] sec = timet[5] print "years: %d" % years print "days: %d" % days print "hours: %d" % hours print "min: %d" % min print "sec: %d" % sec if years > 0: fmt = "%dy %dd" return fmt % (years, days) if days > 0: fmt = "%dd %dh %dm" return fmt % (days, hours, min) if hours > 0: fmt = "%dh %dm %ds" return fmt % (hours, min, sec) if hours > 0: fmt = "%dm %ds" return fmt % (min, sec) def displayresults(scr, offset, datadict, description, maxcount, maxlabelwidth=70, INDENT=4): """ Paints results from one section to the screen. """ # Check screen size and modify label width appropriately (size_y, size_x) = scr.getmaxyx() valuewidth = 5 if size_x < maxlabelwidth + INDENT + valuewidth: labelwidth = size_x - INDENT - valuewidth -1 else: labelwidth = maxlabelwidth formatstring = '%%-%ds%%%dd' % (labelwidth, valuewidth) # model is something like: '%-70s %5d' offset += 1 l = Logwatch().sortdictbyvalue(datadict)[:maxcount] if offset+1 < size_y-1: scr.addstr(offset, 0, "Top %d %s:" % (len(l), description)) # print section head for t in l: # print section data offset += 1 if offset+1 < size_y-1: scr.addstr(offset, INDENT, formatstring % (t[0][:labelwidth], t[1])) return offset def paintscreen(scr, logpath, entrycount): """ Paints the entire screen for curses mode. """ formatstring = '%-70s %5d' INDENT = 4 referrercount = 5 usercount = 5 agentcount = 5 requestcount = 10 resultcount = 5 hostcount = 10 watcher = Logwatch() parsedlog = watcher.handlelog(entrycount, logpath) referrers = watcher.summarize(watcher.REFERRER, parsedlog) users = watcher.summarize(watcher.USERNAME, parsedlog) agents = watcher.summarize(watcher.AGENT, parsedlog) requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog) results = watcher.summarize(watcher.RESULT, parsedlog) hosts = watcher.summarize(watcher.HOST, parsedlog) offset = 0 scr.clear() scr.addstr(offset, 0, "Analyzing last %d entries from %s" % (entrycount, logpath)) offset += 1 (firstsec, lastsec) = statsspan(parsedlog) interval = lastsec - firstsec scr.addstr(offset, 0, "Last entry: %s, Time interval: %ss, %d hits/hr" % (time.strftime("%b %d %H:%M", time.localtime(lastsec)), interval2str(interval), int(entrycount/interval * 3600))) offset = displayresults(scr, offset, hosts, "Hosts", hostcount) offset = displayresults(scr, offset, referrers, "Referrers", referrercount) offset = displayresults(scr, offset, users, "Users", usercount) offset = displayresults(scr, offset, agents, "Agents", agentcount) offset = displayresults(scr, offset, requestpaths, "Request Paths", requestcount) offset = displayresults(scr, offset, results, "HTTP result codes", resultcount) def printresults(datadict, description, maxcount, formatstring="%-70s %5d", INDENT=4): """ Prints out the results for a given section. """ l = Logwatch().sortdictbyvalue(datadict)[:maxcount] print "Top %d %s:" % (len(l), description) for t in l: print " "*INDENT + formatstring % t def printreport(logpath, entrycount): """ Prints all results for a log. """ INDENT = 4 referrercount = 5 usercount = 5 agentcount = 5 requestcount = 10 resultcount = 5 hostcount = 10 watcher = Logwatch() parsedlog = watcher.handlelog(entrycount, logpath) referrers = watcher.summarize(watcher.REFERRER, parsedlog) users = watcher.summarize(watcher.USERNAME, parsedlog) agents = watcher.summarize(watcher.AGENT, parsedlog) requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog) results = watcher.summarize(watcher.RESULT, parsedlog) hosts = watcher.summarize(watcher.HOST, parsedlog) print "Analyzing last %d entries from %s. Updated: %s" % (entrycount, logpath, time.ctime()) print "First entry: %s, Last entry: %s" % (parsedlog[0][2], parsedlog[-1][2]) printresults(hosts, "Hosts", hostcount) printresults(referrers, "Referrers", referrercount) printresults(users, "Users", usercount) printresults(agents, "Agents", agentcount) printresults(requestpaths, "Request Paths", requestcount) printresults(results, "HTTP result codes", resultcount) def printxmlsection(datadict, description, maxcount): """ Prints report section as xml. Section format:
zope.mightydreams.com 158
""" formatstring = " %s\n %d" l = Logwatch().sortdictbyvalue(datadict)[:maxcount] print '
' % (description, len(l), maxcount) print ' ' for t in l: print formatstring % t print ' ' print '
' def printxmlreport(logpath, entrycount): """ Prints entire report as xml. XML looks something like this: Fri Feb 8 10:39:11 2002 /var/lib/zope/var/Z2.log 200
... as defined above ...
""" referrercount = 5 usercount = 5 agentcount = 5 requestcount = 10 resultcount = 5 hostcount = 10 watcher = Logwatch() parsedlog = watcher.handlelog(entrycount, logpath) referrers = watcher.summarize(watcher.REFERRER, parsedlog) users = watcher.summarize(watcher.USERNAME, parsedlog) agents = watcher.summarize(watcher.AGENT, parsedlog) requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog) results = watcher.summarize(watcher.RESULT, parsedlog) hosts = watcher.summarize(watcher.HOST, parsedlog) print "" print " %s" % time.ctime() print " %s" % logpath print " %d" % entrycount print " %s" % parsedlog[0][watcher.TIMESTAMP] print " %s" % parsedlog[-1][watcher.TIMESTAMP] print " " printxmlsection(hosts, "Hosts", hostcount) printxmlsection(referrers, "Referrers", referrercount) printxmlsection(users, "Users", usercount) printxmlsection(agents, "Agents", agentcount) printxmlsection(requestpaths, "Request Paths", requestcount) printxmlsection(results, "HTTP result codes", resultcount) print " " print "" def usage(): print "Usage: logwatch.py -l path -n numberofentries -m (curses | text | xml) -u update frequency" print "" print "Example: " print " logwatch.py -l /var/lib/zope/var/Z2.log -m curses -n 200 -u 120" print "" print "This would run in full-screen curses mode by pulling the last 200 entries from the" print "log and update the display every 120 seconds." print "" print "This would do the same thing:" print " logwatch.py --logfile /var/lib/zope/var/Z2.log \ " print " --numberofentries 200 \ " print " --mode curses \ " print " --updatefrequency 120 " print "" print "Choosing text mode causes the program to read the log, perform the analysis, print it to" print "the screen and exit immediately." print "" print "Logwatch created by Howard Hansen (howardh@halfmagic.com), feel free to modify and use as" print "you will." if __name__=='__main__': import sys, getopt, pprint try: opts, args = getopt.getopt(sys.argv[1:], "dhl:n:m:u:", ["debug", "help", "logfile", "numberofentries", "mode", "updatefrequency"]) except getopt.GetoptError: # print help info and exit usage() sys.exit(2) if len(opts) == 0: usage() sys.exit() for o, a in opts: if o in ("-d", "debug"): print "opts, args:" print opts, args print "sys.argv:" print sys.argv if o in ("-h", "help"): usage() sys.exit() if o in ("-l", "logpath"): logpath = a if not os.path.exists(logpath): print "Error: log file path '%s' does not exist!" % logpath sys.exit(2) if o in ("-n", "numberofentries"): try: entrycount = int(a) if entrycount < 1: raise InputError except: print "Error: number of entries value '%s' is invalid! Needs to be a positive integer." % a sys.exit(2) if o in ("-m", "mode"): mode = string.lower(a) if o in ("-u", "updatefrequency"): try: frequency = int(a) if frequency < 1: raise InputError except: print "Error: update frequency value '%s' is invalid! Needs to be a positive integer." % a sys.exit(2) if mode[0] == "c": # curses try: import curses # Initialize curses scr=curses.initscr() # Turn off echoing of keys, and enter cbreak mode, # where no buffering is performed on keyboard input curses.noecho() curses.cbreak() # In keypad mode, escape sequences for special keys # (like the cursor keys) will be interpreted and # a special value like curses.KEY_LEFT will be returned scr.keypad(1) scr.nodelay(1) # makes getch() non-blocking main(scr, logpath, entrycount, frequency) # Enter the main loop # Set everything back to normal scr.keypad(0) curses.echo() curses.nocbreak() curses.endwin() # Terminate curses except: # In event of error, restore terminal to sane state. scr.keypad(0) curses.echo() curses.nocbreak() curses.endwin() traceback.print_exc() # Print the exception elif mode[0] == "t": # text try: printreport(logpath, entrycount) except: traceback.print_exc() # Print the exception elif mode[0] == "x": # xml try: printxmlreport(logpath, entrycount) except: traceback.print_exc() # Print the exception else: # unknown mode print "Error: Unknown mode '%s'" % mode print " Valid modes are: curses, text, or xml" sys.exit(2)