import time
from xml.sax import ContentHandler

class RSSHandler(ContentHandler):
    """ for rss 0.9-0.91 

    A Sax2 Document Handler for RSS-0.90-.91

    RSS (for version < 1.0) can be found at
    http://my.netscape.com/publish

    RSS 1.0 information can be found at
    http://www.purl.org/rss/1.0

    @author Kapil Thangavelu
    @author kvthan@wm.edu

    """
    
    def __init__(self):
        ContentHandler.__init__(self)
        self.initHandler()
    
    def initHandler(self):
        self._strstripbuffer = ''
        self._buffer = []
        self._stack = []
        self._structure = ('Channel','Image', 'Textinput', 'Item')
        self._content = ('Title', 
                         'Description', 
                         'Url', 
                         'Link',
                         'Width',
                         'Height',
                         'Webmaster',
                         'Copyright',
                         'Language')
        
    # structural elements
    def startStructure(self, attrs):
        self._stack.append({})
        
    def endStructure(self, name):
        structure = self._stack.pop()
        self._stack[-1].setdefault(name, []).append(structure)
        
    # content elements 
    def endContent(self, name):
        self._stack[-1][name] = self._strstripbuffer

    # retriving data
    def getInfo(self):
        return self._stack.pop()
    
    ## content handler interface
    def startDocument(self):
        self._stack.append({})
    
    def startElement(self, name, attrs): 
        name = str(name).lower().capitalize()
        if name in self._structure:
            self.startStructure(attrs)
           
    def endElement(self, name):
        name = str(name).lower().capitalize()       
        
        try:
            self._strstripbuffer = str(''.join(self._buffer)).strip()
        except UnicodeError, e:
            self._strstripbuffer = ''.join(self._buffer).strip()
            self._strstripbuffer = repr(self._strstripbuffer)

        if name in self._structure:
            self.endStructure(name)
        elif name in self._content:
            self.endContent(name)
            
        self._buffer = []
        self._strstripbuffer = ''               
        
    def characters(self, content): 
        self._buffer.append(content)    
       
        
if __name__ == '__main__':
    import sys
    from xml.sax import make_parser
    
    p = make_parser()
    r = RSSHandler()
    p.setContentHandler(r)

    print sys.argv
    p.parse(sys.argv[1])
    
    import pprint
    pprint.pprint(r.getInfo())
