#
# update_staticpage script
# Peter Bengtsson <mail@peterbe.com>
# http://www.zope.org/Members/peterbe/update_staticpage
#
# Date today: 31/08/01
#
# Version: alpha
# Python vers
#
__version__='0.0.1'
__doc__="""A humble script that opens your webpage and sucks in parts of
it and saves the HTML into a DTML Document.

The id of the DTML Document will be the page name of the URL +'_static'.
So, if you open /web/doc/index.html the id of the DTML Document will
be 'index.html_static'. Be careful not to request URLs like: www.server.com/.
Use instead www.server.com/index_html to be explicit.

http://www.zope.org/Members/peterbe/update_staticpage
"""

from urllib import urlopen
from random import choice
import string

def update_staticpage(self,url,container=None):
    """
        sucks in the request index page
        and sets it in a DTML document
    """

    if container is None:
        # less perfect
        container = self

       
    starter = '<!--DYNAMIC-CONTENT-->'
    ender = '<!--/DYNAMIC-CONTENT-->'


    got = getHTML(url)

    if got:
        
        html = got.keys()[0]
        lastbit = got.values()[0]
        if lastbit == '':lastbit = "page"
        lastbit = lastbit + '_static'

        if hasattr(container,lastbit) and \
           getattr(container,lastbit).meta_type=='DTML Document':
            container.manage_delObjects([lastbit])

        # maybe this should be a File object??
        container.manage_addDTMLDocument(lastbit,file=html)

        # return the name of the DTML Document
        return lastbit

    # failure
    return 0



def getHTML(url):
    """
        sucks in the request index page
        return a dictionary with the HTML
        and the guess id.
    """

    starter = '<!--DYNAMIC-CONTENT-->'
    ender = '<!--/DYNAMIC-CONTENT-->'


    # append a random str to prevent cache confusion
    randomstr = '&%s=%s' % \
                (choice(list(string.lowercase)),choice(list(string.lowercase)))

    
    whole_url = '%s?suppress_static=1%s' % (url,randomstr)
    html= urlopen(whole_url).read()

    # This needs work. It means that one can only have One static part per page
    if string.count(html,starter)==1 and string.count(html,ender)==1 \
       and string.find(html,starter) < string.find(html,ender):
        # ok. proceed
        
        html = string.split(html,starter)[1]
        html = string.split(html,ender)[0]

        lastbit = string.split(url,'/')[-1]

        return {html : lastbit}


    return {}

            

if __name__=='__main__':
    print getHTML('http://www.peterbe.com/web/doc/index_html')