#! /usr/bin/env python
"""
version 0.9
Created by Howard Hansen
howardh@halfmagic.com
http://howard.editthispage.com)
February 9, 2002
Copyright (c) 2002, Howard Hansen All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer. Redistributions in
binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or other
materials provided with the distribution. Neither the name of Howard
Hansen nor the names of its contributors may be used to endorse or
promote products derived from this software without specific prior
written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import time, os, string, traceback
class Logwatch:
"""
tuple index:
"""
IP = 0
USERNAME = 1
TIMESTAMP = 2
REQUEST_TYPE = 3
REQUEST_PATH = 4
REQUEST_PROTOCOL = 5
RESULT = 6
SIZE = 7
REFERRER = 8
AGENT = 9
HOST = 10
def __init__(self):
pass
def getchunk(self, text, start, delimiter):
"""
Start at index start in string text and search for delimiter.
Return all characters between start and the delimiter, along with
the ending position, as a tuple.
"""
self.chunk = ''
self.index = string.find(text, delimiter, start)
self.chunk = text[start:self.index]
if self.index == -1:
raise IndexError("Couln't find delimiter '%s' in string '%s'" % (delimiter, text))
return (self.chunk, self.index)
def parseline(self, logline):
"""
Parse one line of a zope log, returning a 10-item tuple.
"""
(self.ip, self.nextstart) = self.getchunk(logline, 0, " ")
(self.username, self.nextstart) = self.getchunk(logline, self.nextstart + 3, " ")
(self.timestamp, self.nextstart) = self.getchunk(logline, self.nextstart + 2, "]")
(self.request, self.nextstart) = self.getchunk(logline, self.nextstart + 3, '"')
# this chunk is not simple, since I've found urls with spaces in them.
# used to be:
#(request_type, request_path, request_protocol) = string.split(request)
self.request_tuple = string.split(self.request)
self.request_type = self.request_tuple[0]
self.request_protocol = self.request_tuple[-1]
self.request_path = string.join(self.request_tuple[1:-1], ' ')
(self.host, self.path) = self.splithostandpath(self.request_path)
(self.result, self.nextstart) = self.getchunk(logline, self.nextstart + 2, ' ')
(self.size, self.nextstart) = self.getchunk(logline, self.nextstart + 1, ' ')
(self.referrer, self.nextstart) = self.getchunk(logline, self.nextstart + 2, '"')
(self.agent, self.nextstart) = self.getchunk(logline, self.nextstart + 3, '"')
return (self.ip,
self.username,
self.timestamp,
self.request_type,
self.path,
self.request_protocol,
self.result,
self.size,
self.referrer,
self.agent,
self.host)
def splithostandpath(self, request_path):
"""
In sites using Zope's Virtual Host Monster behind Apache's URL
rewriting this splits out the host name from the rather
convoluted raw path
request_path looks like:
/VirtualHostBase/http/zope.mightydreams.com:80/uptime
The result for this should be:
('zope.mightydreams.com', '/uptime')
This may need modification for sites using different tools.
"""
self.prefix = '/VirtualHostBase/http/'
if request_path[:len(self.prefix)] == self.prefix:
request_path = request_path[len(self.prefix):]
self.colonpos = string.find(request_path, ":")
self.slashpos = string.find(request_path, "/")
self.host = string.lower(request_path[:self.colonpos])
self.path = request_path[self.slashpos:]
else:
self.host = "*"
self.path = request_path
return (self.host, self.path)
def summarize(self, tupleindex, sourcelist):
"""
Takes an index indicating which tuple item to summarize and a
source list of tuples. Returns a dictionary with keys
representing the data items in the list, and values representing
the number of occurances of each item.
"""
self.results = {}
for self.item in sourcelist:
if self.results.has_key(self.item[tupleindex]):
self.results[self.item[tupleindex]] += 1
else:
self.results[self.item[tupleindex]] = 1
return self.results
def sortdictbyvalue(self, dict):
"""
Takes a dictionary object and returns a list of tuples. Index 0
is the dictionary key, index 1 is the value associated. List is
sorted in descending order by the value.
"""
self.items = dict.items()
self.items.sort(lambda left, right: cmp(right[1], left[1]))
return self.items
def handlelog(self, tailcount, logfile):
"""
Gets last tailcount lines in the log file. Parses those lines
into tuples and returns a list of those tuples.
"""
self.tailcommand = "tail -%d %s" % (tailcount, logfile)
log = os.popen(self.tailcommand).readlines()
self.parsedlog = []
for entry in log:
self.parsedlog.append(self.parseline(entry))
return self.parsedlog
def main(scr, logpath, entrycount, frequency):
"""
The main loop for curses mode. Updates screen once a second.
"""
paintscreen(scr, logpath, entrycount)
nexttime = time.time() + frequency
# lastupdate = time.strftime('%Y-%m-%d %H:%M:%S')
lastupdate = time.strftime('%H:%M:%S')
c = scr.getch()
while c == -1:
(size_y, size_x) = scr.getmaxyx()
statusmessage(scr, "Last update: %s. Next in %d sec. Hit 'q' to quit or space to update now." % (lastupdate, int(nexttime-time.time())))
c = scr.getch()
if c == ord('q'):
scr.move(size_y-1,0)
scr.clrtoeol()
scr.refresh()
break # Exit the while()
elif c == ord('u') or c == ord(' '):
paintscreen(scr, logpath, entrycount)
nexttime = time.clock() + frequency
c = -1
else:
c = -1
if time.time() >= nexttime:
paintscreen(scr, logpath, entrycount)
nexttime = time.time() + frequency
time.sleep(1)
def statusmessage(scr, message):
"""
Adds a statusbar message at the bottom of the screen, as
defined by scr.
"""
(size_y, size_x) = scr.getmaxyx()
scr.move(size_y-1,0)
scr.clrtoeol()
scr.addnstr(size_y-1, 0, message[:size_x], size_x-1)
def cpuusage():
"""
Calculates current cpu usage from vmstat.
Don't know if this works correctly.
"""
return tuple(os.popen('vmstat').readlines()[-1].split()[13:])
def statsspan(parsedlog):
"""
Takes the parsed log list as an argument.
Returns a tuple with the seconds of the start and end times.
"""
timeindex = Logwatch().TIMESTAMP
firstsec = zopetimeparse(parsedlog[0][timeindex])
lastsec = zopetimeparse(parsedlog[-1][timeindex])
return (firstsec, lastsec)
def zopetimeparse(ztime):
"""
ztime looks like: '08/Feb/2002:16:37:53 -0700'
Returns Unix time (seconds of the epoch)
There's got to be a better way to do this, but I can't for the
life of me figure it out right now. And this works, so far as
I can tell. So sue me. Or better, write a better routine and
send it to me!
"""
datetime, None = ztime.split() # strip time zone
date, hour, min, sec = datetime.split(":") # split out time
day, mon, year = date.split("/") # split out date
monthhash = {
'Jan': 1,
'Feb': 2,
'Mar': 3,
'Apr': 4,
'May': 5,
'Jun': 6,
'Jul': 7,
'Aug': 8,
'Sep': 9,
'Oct': 10,
'Nov': 11,
'Dec': 12
} # I assume that all Zope logs use US format
month = monthhash[mon] # calculate month number
fmt = "%Y-%m-%d:%H:%M:%S" # format that time.strptime understands
mask = "%s-%s-%s:%s:%s:%s" # string sub format to build fmt format
t = (year, month, day, hour, min, sec)
str = mask % t
timet = time.strptime(str, fmt)
return time.mktime(timet)
def interval2str(secs):
"""
Prints out text representation of the time interval provided.
There's probably a better way to do this too. Feel free to provide
a patch!
132092.0 seconds returns 1d 12h 41m
(1970, 1, 2, 12, 41, 32, 4, 2, 0)
y m d hr min sec dow doy tz
3600.0 seconds returns 1h 0m 0s
(1970, 1, 1, 1, 0, 0, 3, 1, 0)
y m d hr min sec dow doy tz
0 1 2 3 4 5 6 7 8
"""
timet = time.gmtime(secs)
years = timet[0] - 1970
days = timet[7] - 1
hours = timet[3]
min = timet[4]
sec = timet[5]
print "years: %d" % years
print "days: %d" % days
print "hours: %d" % hours
print "min: %d" % min
print "sec: %d" % sec
if years > 0:
fmt = "%dy %dd"
return fmt % (years, days)
if days > 0:
fmt = "%dd %dh %dm"
return fmt % (days, hours, min)
if hours > 0:
fmt = "%dh %dm %ds"
return fmt % (hours, min, sec)
if hours > 0:
fmt = "%dm %ds"
return fmt % (min, sec)
def displayresults(scr, offset, datadict, description, maxcount, maxlabelwidth=70, INDENT=4):
"""
Paints results from one section to the screen.
"""
# Check screen size and modify label width appropriately
(size_y, size_x) = scr.getmaxyx()
valuewidth = 5
if size_x < maxlabelwidth + INDENT + valuewidth:
labelwidth = size_x - INDENT - valuewidth -1
else:
labelwidth = maxlabelwidth
formatstring = '%%-%ds%%%dd' % (labelwidth, valuewidth) # model is something like: '%-70s %5d'
offset += 1
l = Logwatch().sortdictbyvalue(datadict)[:maxcount]
if offset+1 < size_y-1:
scr.addstr(offset, 0, "Top %d %s:" % (len(l), description)) # print section head
for t in l: # print section data
offset += 1
if offset+1 < size_y-1:
scr.addstr(offset, INDENT, formatstring % (t[0][:labelwidth], t[1]))
return offset
def paintscreen(scr, logpath, entrycount):
"""
Paints the entire screen for curses mode.
"""
formatstring = '%-70s %5d'
INDENT = 4
referrercount = 5
usercount = 5
agentcount = 5
requestcount = 10
resultcount = 5
hostcount = 10
watcher = Logwatch()
parsedlog = watcher.handlelog(entrycount, logpath)
referrers = watcher.summarize(watcher.REFERRER, parsedlog)
users = watcher.summarize(watcher.USERNAME, parsedlog)
agents = watcher.summarize(watcher.AGENT, parsedlog)
requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog)
results = watcher.summarize(watcher.RESULT, parsedlog)
hosts = watcher.summarize(watcher.HOST, parsedlog)
offset = 0
scr.clear()
scr.addstr(offset, 0, "Analyzing last %d entries from %s" % (entrycount, logpath))
offset += 1
(firstsec, lastsec) = statsspan(parsedlog)
interval = lastsec - firstsec
scr.addstr(offset, 0, "Last entry: %s, Time interval: %ss, %d hits/hr" % (time.strftime("%b %d %H:%M", time.localtime(lastsec)),
interval2str(interval),
int(entrycount/interval * 3600)))
offset = displayresults(scr, offset, hosts, "Hosts", hostcount)
offset = displayresults(scr, offset, referrers, "Referrers", referrercount)
offset = displayresults(scr, offset, users, "Users", usercount)
offset = displayresults(scr, offset, agents, "Agents", agentcount)
offset = displayresults(scr, offset, requestpaths, "Request Paths", requestcount)
offset = displayresults(scr, offset, results, "HTTP result codes", resultcount)
def printresults(datadict, description, maxcount, formatstring="%-70s %5d", INDENT=4):
"""
Prints out the results for a given section.
"""
l = Logwatch().sortdictbyvalue(datadict)[:maxcount]
print "Top %d %s:" % (len(l), description)
for t in l:
print " "*INDENT + formatstring % t
def printreport(logpath, entrycount):
"""
Prints all results for a log.
"""
INDENT = 4
referrercount = 5
usercount = 5
agentcount = 5
requestcount = 10
resultcount = 5
hostcount = 10
watcher = Logwatch()
parsedlog = watcher.handlelog(entrycount, logpath)
referrers = watcher.summarize(watcher.REFERRER, parsedlog)
users = watcher.summarize(watcher.USERNAME, parsedlog)
agents = watcher.summarize(watcher.AGENT, parsedlog)
requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog)
results = watcher.summarize(watcher.RESULT, parsedlog)
hosts = watcher.summarize(watcher.HOST, parsedlog)
print "Analyzing last %d entries from %s. Updated: %s" % (entrycount, logpath, time.ctime())
print "First entry: %s, Last entry: %s" % (parsedlog[0][2], parsedlog[-1][2])
printresults(hosts, "Hosts", hostcount)
printresults(referrers, "Referrers", referrercount)
printresults(users, "Users", usercount)
printresults(agents, "Agents", agentcount)
printresults(requestpaths, "Request Paths", requestcount)
printresults(results, "HTTP result codes", resultcount)
def printxmlsection(datadict, description, maxcount):
"""
Prints report section as xml.
Section format:
zope.mightydreams.com
158
"""
formatstring = " %s\n %d"
l = Logwatch().sortdictbyvalue(datadict)[:maxcount]
print ' ' % (description, len(l), maxcount)
print ' '
for t in l:
print formatstring % t
print ' '
print ' '
def printxmlreport(logpath, entrycount):
"""
Prints entire report as xml.
XML looks something like this:
Fri Feb 8 10:39:11 2002
/var/lib/zope/var/Z2.log
200
"""
referrercount = 5
usercount = 5
agentcount = 5
requestcount = 10
resultcount = 5
hostcount = 10
watcher = Logwatch()
parsedlog = watcher.handlelog(entrycount, logpath)
referrers = watcher.summarize(watcher.REFERRER, parsedlog)
users = watcher.summarize(watcher.USERNAME, parsedlog)
agents = watcher.summarize(watcher.AGENT, parsedlog)
requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog)
results = watcher.summarize(watcher.RESULT, parsedlog)
hosts = watcher.summarize(watcher.HOST, parsedlog)
print ""
print " %s" % time.ctime()
print " %s" % logpath
print " %d" % entrycount
print " %s" % parsedlog[0][watcher.TIMESTAMP]
print " %s" % parsedlog[-1][watcher.TIMESTAMP]
print " "
printxmlsection(hosts, "Hosts", hostcount)
printxmlsection(referrers, "Referrers", referrercount)
printxmlsection(users, "Users", usercount)
printxmlsection(agents, "Agents", agentcount)
printxmlsection(requestpaths, "Request Paths", requestcount)
printxmlsection(results, "HTTP result codes", resultcount)
print " "
print ""
def usage():
print "Usage: logwatch.py -l path -n numberofentries -m (curses | text | xml) -u update frequency"
print ""
print "Example: "
print " logwatch.py -l /var/lib/zope/var/Z2.log -m curses -n 200 -u 120"
print ""
print "This would run in full-screen curses mode by pulling the last 200 entries from the"
print "log and update the display every 120 seconds."
print ""
print "This would do the same thing:"
print " logwatch.py --logfile /var/lib/zope/var/Z2.log \ "
print " --numberofentries 200 \ "
print " --mode curses \ "
print " --updatefrequency 120 "
print ""
print "Choosing text mode causes the program to read the log, perform the analysis, print it to"
print "the screen and exit immediately."
print ""
print "Logwatch created by Howard Hansen (howardh@halfmagic.com), feel free to modify and use as"
print "you will."
if __name__=='__main__':
import sys, getopt, pprint
try:
opts, args = getopt.getopt(sys.argv[1:],
"dhl:n:m:u:",
["debug", "help", "logfile", "numberofentries", "mode", "updatefrequency"])
except getopt.GetoptError:
# print help info and exit
usage()
sys.exit(2)
if len(opts) == 0:
usage()
sys.exit()
for o, a in opts:
if o in ("-d", "debug"):
print "opts, args:"
print opts, args
print "sys.argv:"
print sys.argv
if o in ("-h", "help"):
usage()
sys.exit()
if o in ("-l", "logpath"):
logpath = a
if not os.path.exists(logpath):
print "Error: log file path '%s' does not exist!" % logpath
sys.exit(2)
if o in ("-n", "numberofentries"):
try:
entrycount = int(a)
if entrycount < 1: raise InputError
except:
print "Error: number of entries value '%s' is invalid! Needs to be a positive integer." % a
sys.exit(2)
if o in ("-m", "mode"):
mode = string.lower(a)
if o in ("-u", "updatefrequency"):
try:
frequency = int(a)
if frequency < 1: raise InputError
except:
print "Error: update frequency value '%s' is invalid! Needs to be a positive integer." % a
sys.exit(2)
if mode[0] == "c": # curses
try:
import curses
# Initialize curses
scr=curses.initscr()
# Turn off echoing of keys, and enter cbreak mode,
# where no buffering is performed on keyboard input
curses.noecho()
curses.cbreak()
# In keypad mode, escape sequences for special keys
# (like the cursor keys) will be interpreted and
# a special value like curses.KEY_LEFT will be returned
scr.keypad(1)
scr.nodelay(1) # makes getch() non-blocking
main(scr, logpath, entrycount, frequency) # Enter the main loop
# Set everything back to normal
scr.keypad(0)
curses.echo()
curses.nocbreak()
curses.endwin() # Terminate curses
except:
# In event of error, restore terminal to sane state.
scr.keypad(0)
curses.echo()
curses.nocbreak()
curses.endwin()
traceback.print_exc() # Print the exception
elif mode[0] == "t": # text
try:
printreport(logpath, entrycount)
except:
traceback.print_exc() # Print the exception
elif mode[0] == "x": # xml
try:
printxmlreport(logpath, entrycount)
except:
traceback.print_exc() # Print the exception
else: # unknown mode
print "Error: Unknown mode '%s'" % mode
print " Valid modes are: curses, text, or xml"
sys.exit(2)