#! /usr/bin/env python


"""
    version 0.9
    Created by Howard Hansen 
               howardh@halfmagic.com
               http://howard.editthispage.com)
    February 9, 2002
    
    Copyright (c) 2002, Howard Hansen All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
    met:

    Redistributions of source code must retain the above copyright notice,
    this list of conditions and the following disclaimer. Redistributions in
    binary form must reproduce the above copyright notice, this list of
    conditions and the following disclaimer in the documentation and/or other
    materials provided with the distribution. Neither the name of Howard
    Hansen nor the names of its contributors may be used to endorse or
    promote products derived from this software without specific prior
    written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
    AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 
    INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
    THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
    NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
    THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""


import time, os, string, traceback

class Logwatch:
    """
    tuple index:
    """
    IP = 0
    USERNAME = 1
    TIMESTAMP = 2
    REQUEST_TYPE = 3
    REQUEST_PATH = 4
    REQUEST_PROTOCOL = 5
    RESULT = 6
    SIZE = 7
    REFERRER = 8
    AGENT = 9
    HOST = 10
    
    def __init__(self):
        pass
        
    def getchunk(self, text, start, delimiter):
        """
            Start at index start in string text and search for delimiter.
            Return all characters between start and the delimiter, along with
            the ending position, as a tuple.
        """
        self.chunk = ''
        self.index = string.find(text, delimiter, start)
        self.chunk = text[start:self.index]
        if self.index == -1:
            raise IndexError("Couln't find delimiter '%s' in string '%s'" % (delimiter, text))
        return (self.chunk, self.index)

    def parseline(self, logline):
        """
            Parse one line of a zope log, returning a 10-item tuple.
        """
        (self.ip, self.nextstart) = self.getchunk(logline, 0, " ")
        (self.username, self.nextstart) = self.getchunk(logline, self.nextstart + 3, " ")
        (self.timestamp, self.nextstart) = self.getchunk(logline, self.nextstart + 2, "]")
        (self.request, self.nextstart) = self.getchunk(logline, self.nextstart + 3, '"')
        # this chunk is not simple, since I've found urls with spaces in them.
        # used to be:
        #(request_type, request_path, request_protocol) = string.split(request)
        self.request_tuple = string.split(self.request)
        self.request_type = self.request_tuple[0]
        self.request_protocol = self.request_tuple[-1]
        self.request_path = string.join(self.request_tuple[1:-1], ' ')
        (self.host, self.path) = self.splithostandpath(self.request_path)
        (self.result, self.nextstart) = self.getchunk(logline, self.nextstart + 2, ' ')
        (self.size, self.nextstart) = self.getchunk(logline, self.nextstart + 1, ' ')
        (self.referrer, self.nextstart) = self.getchunk(logline, self.nextstart + 2, '"')
        (self.agent, self.nextstart) = self.getchunk(logline, self.nextstart + 3, '"')
        return (self.ip,
                self.username,
                self.timestamp,
                self.request_type,
                self.path,
                self.request_protocol,
                self.result,
                self.size,
                self.referrer,
                self.agent, 
                self.host)


    def splithostandpath(self, request_path):
        """
            In sites using Zope's Virtual Host Monster behind Apache's URL
            rewriting this splits out the host name from the rather
            convoluted raw path
        
            request_path looks like:
            /VirtualHostBase/http/zope.mightydreams.com:80/uptime 

            The result for this should be:
            ('zope.mightydreams.com', '/uptime')
            
            This may need modification for sites using different tools.  
        """
        self.prefix = '/VirtualHostBase/http/'
        if request_path[:len(self.prefix)] == self.prefix:
            request_path = request_path[len(self.prefix):]
            self.colonpos = string.find(request_path, ":")
            self.slashpos = string.find(request_path, "/")
            self.host = string.lower(request_path[:self.colonpos])
            self.path = request_path[self.slashpos:]
        else:
            self.host = "*"
            self.path = request_path

        return (self.host, self.path)

    def summarize(self, tupleindex, sourcelist):
        """
            Takes an index indicating which tuple item to summarize and a
            source list of tuples. Returns a dictionary with keys
            representing the data items in the list, and values representing
            the number of occurances of each item.
        """
        self.results = {}
        for self.item in sourcelist:
            if self.results.has_key(self.item[tupleindex]):
                self.results[self.item[tupleindex]] += 1
            else:
                self.results[self.item[tupleindex]] = 1
        return self.results
    
    def sortdictbyvalue(self, dict):
        """
            Takes a dictionary object and returns a list of tuples.  Index 0
            is the dictionary key, index 1 is the value associated.  List is
            sorted in descending order by the value.
        """
        self.items = dict.items()
        self.items.sort(lambda left, right: cmp(right[1], left[1]))
        return self.items

    def handlelog(self, tailcount, logfile):
        """
            Gets last tailcount lines in the log file. Parses those lines
            into tuples and returns a list of those tuples.
        """
        self.tailcommand = "tail -%d %s" % (tailcount, logfile)
        log = os.popen(self.tailcommand).readlines()

        self.parsedlog = []
        for entry in log:
            self.parsedlog.append(self.parseline(entry))
        return self.parsedlog


def main(scr, logpath, entrycount, frequency):
    """
        The main loop for curses mode.  Updates screen once a second.
    """
    paintscreen(scr, logpath, entrycount)
    nexttime = time.time() + frequency
#    lastupdate = time.strftime('%Y-%m-%d %H:%M:%S')
    lastupdate = time.strftime('%H:%M:%S')
    
    c = scr.getch()
    while c == -1:
        (size_y, size_x) = scr.getmaxyx()     
        statusmessage(scr, "Last update: %s. Next in %d sec. Hit 'q' to quit or space to update now." % (lastupdate, int(nexttime-time.time())))
        c = scr.getch()
        if c == ord('q'): 
            scr.move(size_y-1,0)
            scr.clrtoeol()
            scr.refresh()
            break  # Exit the while()
        elif c == ord('u') or c == ord(' '): 
            paintscreen(scr, logpath, entrycount)
            nexttime = time.clock() + frequency
            c = -1
        else:
            c = -1
            
        if time.time() >= nexttime:
            paintscreen(scr, logpath, entrycount)
            nexttime = time.time() + frequency
        time.sleep(1)
            

def statusmessage(scr, message):
    """
        Adds a statusbar message at the bottom of the screen, as 
        defined by scr.
    """
    (size_y, size_x) = scr.getmaxyx()     
    scr.move(size_y-1,0)
    scr.clrtoeol()
    scr.addnstr(size_y-1, 0, message[:size_x], size_x-1)

def cpuusage():
    """
        Calculates current cpu usage from vmstat.
        Don't know if this works correctly.
    """
    return tuple(os.popen('vmstat').readlines()[-1].split()[13:])

def statsspan(parsedlog):
    """
        Takes the parsed log list as an argument.
        Returns a tuple with the seconds of the start and end times.
    """
    timeindex = Logwatch().TIMESTAMP
    firstsec = zopetimeparse(parsedlog[0][timeindex])
    lastsec = zopetimeparse(parsedlog[-1][timeindex])
    return (firstsec, lastsec)
    
def zopetimeparse(ztime):
    """ 
        ztime looks like: '08/Feb/2002:16:37:53 -0700'
        Returns Unix time (seconds of the epoch)
    
        There's got to be a better way to do this, but I can't for the
        life of me figure it out right now.  And this works, so far as
        I can tell.  So sue me.  Or better, write a better routine and
        send it to me!  
    """
    
    datetime, None = ztime.split()                  # strip time zone
    date, hour, min, sec = datetime.split(":")      # split out time
    day, mon, year = date.split("/")                # split out date
    monthhash = {
        'Jan': 1,
        'Feb': 2,
        'Mar': 3,
        'Apr': 4,
        'May': 5,
        'Jun': 6,
        'Jul': 7,
        'Aug': 8,
        'Sep': 9,
        'Oct': 10,
        'Nov': 11,
        'Dec': 12
        }                                           # I assume that all Zope logs use US format
    month = monthhash[mon]                          # calculate month number 
    
    fmt = "%Y-%m-%d:%H:%M:%S"                       # format that time.strptime understands
    mask = "%s-%s-%s:%s:%s:%s"                      # string sub format to build fmt format
    t = (year, month, day, hour, min, sec)
    str = mask % t
    timet = time.strptime(str, fmt)
    return time.mktime(timet)

def interval2str(secs):
    """
        Prints out text representation of the time interval provided.
        There's probably a better way to do this too.  Feel free to provide
        a patch!
        
        132092.0 seconds returns 1d 12h 41m
        (1970, 1, 2,  12, 41, 32, 4,  2,   0)
         y     m  d   hr  min sec dow doy  tz
        3600.0 seconds returns 1h 0m 0s
        (1970, 1, 1,  1,  0,  0,  3,  1,   0)
         y     m  d   hr  min sec dow doy  tz
         0     1  2   3   4   5   6   7    8
    """
    timet = time.gmtime(secs)
    years = timet[0] - 1970
    days = timet[7] - 1 
    hours = timet[3]
    min = timet[4]
    sec = timet[5]
    
    print "years: %d" % years
    print "days: %d" % days
    print "hours: %d" % hours
    print "min: %d" % min
    print "sec: %d" % sec
    
    if years > 0:
        fmt = "%dy %dd"
        return fmt % (years, days)
    if days > 0:
        fmt = "%dd %dh %dm"
        return fmt % (days, hours, min)
    if hours > 0:
        fmt = "%dh %dm %ds"
        return fmt % (hours, min, sec)
    if hours > 0:
        fmt = "%dm %ds"
        return fmt % (min, sec)


def displayresults(scr, offset, datadict, description, maxcount, maxlabelwidth=70, INDENT=4):
    """
        Paints results from one section to the screen.
    """
    # Check screen size and modify label width appropriately
    (size_y, size_x) = scr.getmaxyx()
    valuewidth = 5
    if size_x < maxlabelwidth + INDENT + valuewidth:
        labelwidth = size_x - INDENT - valuewidth -1
    else:
        labelwidth = maxlabelwidth
    formatstring = '%%-%ds%%%dd' % (labelwidth, valuewidth)  # model is something like: '%-70s %5d'
    offset += 1
    l = Logwatch().sortdictbyvalue(datadict)[:maxcount]
    if offset+1 < size_y-1:
        scr.addstr(offset, 0, "Top %d %s:" % (len(l), description)) # print section head
    for t in l:             # print section data
        offset += 1
        if offset+1 < size_y-1:
            scr.addstr(offset, INDENT, formatstring % (t[0][:labelwidth], t[1]))
    return offset


def paintscreen(scr, logpath, entrycount):
    """
        Paints the entire screen for curses mode.  
    """
    formatstring = '%-70s %5d'
    INDENT = 4
    referrercount = 5
    usercount = 5
    agentcount = 5
    requestcount = 10
    resultcount = 5
    hostcount = 10
    
    watcher = Logwatch()
    parsedlog = watcher.handlelog(entrycount, logpath)
    referrers = watcher.summarize(watcher.REFERRER, parsedlog)
    users = watcher.summarize(watcher.USERNAME, parsedlog)
    agents = watcher.summarize(watcher.AGENT, parsedlog)
    requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog)
    results = watcher.summarize(watcher.RESULT, parsedlog)
    hosts = watcher.summarize(watcher.HOST, parsedlog)

    offset = 0
    
    scr.clear()
    scr.addstr(offset, 0, "Analyzing last %d entries from %s" % (entrycount, logpath))

    offset += 1
    (firstsec, lastsec) = statsspan(parsedlog)
    interval = lastsec - firstsec
    scr.addstr(offset, 0, "Last entry: %s, Time interval: %ss, %d hits/hr" % (time.strftime("%b %d %H:%M", time.localtime(lastsec)), 
                                                                             interval2str(interval), 
                                                                             int(entrycount/interval * 3600)))
    
    offset = displayresults(scr, offset, hosts, "Hosts", hostcount)
    offset = displayresults(scr, offset, referrers, "Referrers", referrercount)
    offset = displayresults(scr, offset, users, "Users", usercount)
    offset = displayresults(scr, offset, agents, "Agents", agentcount)
    offset = displayresults(scr, offset, requestpaths, "Request Paths", requestcount)
    offset = displayresults(scr, offset, results, "HTTP result codes", resultcount)
    
   
def printresults(datadict, description, maxcount, formatstring="%-70s %5d", INDENT=4):
    """
        Prints out the results for a given section.
    """
    l = Logwatch().sortdictbyvalue(datadict)[:maxcount]
    print "Top %d %s:" % (len(l), description)
    for t in l:
        print " "*INDENT + formatstring % t

def printreport(logpath, entrycount):
    """
        Prints all results for a log.  
    """
    INDENT = 4
    referrercount = 5
    usercount = 5
    agentcount = 5
    requestcount = 10
    resultcount = 5
    hostcount = 10
    
    watcher = Logwatch()
    parsedlog = watcher.handlelog(entrycount, logpath)
    referrers = watcher.summarize(watcher.REFERRER, parsedlog)
    users = watcher.summarize(watcher.USERNAME, parsedlog)
    agents = watcher.summarize(watcher.AGENT, parsedlog)
    requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog)
    results = watcher.summarize(watcher.RESULT, parsedlog)
    hosts = watcher.summarize(watcher.HOST, parsedlog)

    print "Analyzing last %d entries from %s. Updated: %s" % (entrycount, logpath, time.ctime())
    print "First entry: %s, Last entry: %s" % (parsedlog[0][2], parsedlog[-1][2])

    printresults(hosts, "Hosts", hostcount)
    printresults(referrers, "Referrers", referrercount)
    printresults(users, "Users", usercount)
    printresults(agents, "Agents", agentcount)
    printresults(requestpaths, "Request Paths", requestcount)
    printresults(results, "HTTP result codes", resultcount)


def printxmlsection(datadict, description, maxcount):
    """
        Prints report section as xml.
        
        Section format:
        <section name="hosts" resultsreturned="3" maxresultsreturned="10">
          <data>
            <category>zope.mightydreams.com</category>
            <value>158</value>
          </data>
        </section>
    """
    
    formatstring = "        <category>%s</category>\n        <value>%d</value>"

    l = Logwatch().sortdictbyvalue(datadict)[:maxcount]
    print '    <section name="%s" resultsreturned="%d" maxresultsreturned="%d">' % (description, len(l), maxcount)
    print '      <data>'
    for t in l: 
        print formatstring % t
    print '      </data>'
    print '    </section>'

def printxmlreport(logpath, entrycount):
    """
        Prints entire report as xml.  

        XML looks something like this:
        <report>
          <runtime>Fri Feb  8 10:39:11 2002</runtime>
          <logfile>/var/lib/zope/var/Z2.log</logfile>
          <entrycount>200</entrycount>
          <firstentrytime></firstentrytime>
          <lastentrytime></lastentrytime>
          <sections>
            <section>... as defined above ...</section>
          </sections>
        </report>
    """

    referrercount = 5
    usercount = 5
    agentcount = 5
    requestcount = 10
    resultcount = 5
    hostcount = 10
    
    watcher = Logwatch()
    parsedlog = watcher.handlelog(entrycount, logpath)
    referrers = watcher.summarize(watcher.REFERRER, parsedlog)
    users = watcher.summarize(watcher.USERNAME, parsedlog)
    agents = watcher.summarize(watcher.AGENT, parsedlog)
    requestpaths = watcher.summarize(watcher.REQUEST_PATH, parsedlog)
    results = watcher.summarize(watcher.RESULT, parsedlog)
    hosts = watcher.summarize(watcher.HOST, parsedlog)

    print "<report>"
    print "  <runtime>%s</runtime>" % time.ctime()
    print "  <logfile>%s</logfile>" % logpath
    print "  <entrycount>%d</entrycount>" % entrycount
    print "  <firstentrytime>%s</firstentrytime>" % parsedlog[0][watcher.TIMESTAMP]
    print "  <lastentrytime>%s</lastentrytime>" % parsedlog[-1][watcher.TIMESTAMP]
    print "  <sections>"    

    printxmlsection(hosts, "Hosts", hostcount)
    printxmlsection(referrers, "Referrers", referrercount)
    printxmlsection(users, "Users", usercount)
    printxmlsection(agents, "Agents", agentcount)
    printxmlsection(requestpaths, "Request Paths", requestcount)
    printxmlsection(results, "HTTP result codes", resultcount)

    print "  </sections>"    
    print "</report>"


def usage():
    print "Usage: logwatch.py -l path -n numberofentries -m (curses | text | xml) -u update frequency"
    print ""
    print "Example: "
    print "   logwatch.py -l /var/lib/zope/var/Z2.log -m curses -n 200 -u 120"
    print ""
    print "This would run in full-screen curses mode by pulling the last 200 entries from the"
    print "log and update the display every 120 seconds."
    print ""
    print "This would do the same thing:"
    print "   logwatch.py --logfile /var/lib/zope/var/Z2.log \ "
    print "               --numberofentries 200 \ "
    print "               --mode curses \ "
    print "               --updatefrequency 120 "
    print ""
    print "Choosing text mode causes the program to read the log, perform the analysis, print it to"
    print "the screen and exit immediately."
    print ""
    print "Logwatch created by Howard Hansen (howardh@halfmagic.com), feel free to modify and use as"
    print "you will."
    

if __name__=='__main__':
    import sys, getopt, pprint
    try:
        opts, args = getopt.getopt(sys.argv[1:], 
                                   "dhl:n:m:u:", 
                                   ["debug", "help", "logfile", "numberofentries", "mode", "updatefrequency"])
    except getopt.GetoptError:
        # print help info and exit
        usage()
        sys.exit(2)
        
    if len(opts) == 0:
        usage()
        sys.exit()

    
    for o, a in opts:
        if o in ("-d", "debug"):
            print "opts, args:"
            print opts, args
            print "sys.argv:"
            print sys.argv

        if o in ("-h", "help"):
            usage()
            sys.exit()

        if o in ("-l", "logpath"):
            logpath = a
            if not os.path.exists(logpath):
                print "Error: log file path '%s' does not exist!" % logpath
                sys.exit(2)

        if o in ("-n", "numberofentries"):
            try:
                entrycount = int(a)
                if entrycount < 1: raise InputError
            except:
                print "Error: number of entries value '%s' is invalid!  Needs to be a positive integer." % a
                sys.exit(2)

        if o in ("-m", "mode"):
            mode = string.lower(a)

        if o in ("-u", "updatefrequency"):
            try:
                frequency = int(a)
                if frequency < 1: raise InputError
            except:
                print "Error: update frequency value '%s' is invalid!  Needs to be a positive integer." % a
                sys.exit(2)
    
    
    if mode[0] == "c":  # curses
        try:
            import curses
            # Initialize curses
            scr=curses.initscr()
            # Turn off echoing of keys, and enter cbreak mode,
            # where no buffering is performed on keyboard input
            curses.noecho()
            curses.cbreak()

            # In keypad mode, escape sequences for special keys
            # (like the cursor keys) will be interpreted and
            # a special value like curses.KEY_LEFT will be returned
            scr.keypad(1)
            scr.nodelay(1)  # makes getch() non-blocking

            main(scr, logpath, entrycount, frequency)             # Enter the main loop

            # Set everything back to normal
            scr.keypad(0)
            curses.echo()
            curses.nocbreak()
            curses.endwin()    # Terminate curses
        except:
            # In event of error, restore terminal to sane state.
            scr.keypad(0)
            curses.echo()
            curses.nocbreak()
            curses.endwin()
            traceback.print_exc()            # Print the exception

    elif mode[0] == "t":        # text
        try:
            printreport(logpath, entrycount)
        except:
            traceback.print_exc()            # Print the exception

    elif mode[0] == "x":        # xml
        try:
            printxmlreport(logpath, entrycount)
        except:
            traceback.print_exc()            # Print the exception

    else:                       # unknown mode
        print "Error: Unknown mode '%s'" % mode
        print " Valid modes are: curses, text, or xml"
        sys.exit(2)