# Copyright (C) 2001 Stfane Fermigier <sf@fermigier.com> and
# Nuxeo SARL <contact@nuxeo.com>.
# See licence info at the end of this file.

"""HTML Documents

HTML Documents contain static HTML content (no DTML code), with HTML
headers and footers replaced by (DTML Methods) standard_html_header and
standard_html_footer.
"""

__version__='$Revision: 1.7 $'[11:-2]

from OFS.SimpleItem import SimpleItem, Item_w__name__
from OFS.PropertyManager import PropertyManager
from OFS.Cache import Cacheable
from OFS.History import Historical
from Products.ZCatalog.CatalogAwareness import CatalogAware
import Acquisition
from DateTime.DateTime import DateTime

from AccessControl import ClassSecurityInfo, getSecurityManager
from Globals import HTML, HTMLFile, DTMLFile, MessageDialog, InitializeClass

from urllib import quote
import re, string, traceback
from IHTMLDocument import IHTMLDocument
from parser import HTMLParser


class HTMLDocument(CatalogAware, SimpleItem, Acquisition.Implicit, 
  PropertyManager, Historical, Cacheable, Item_w__name__):
  # + ElementWithTitle (XXX ???)

  """HTML Documents"""

  # XXX please someone explain how to maike this work.
  #__implements__ = (IHTMLDocument,)

  meta_type = 'HTML Document'
  icon = 'p_/dtmldoc'

  html_src = body = raw_text = ''

  index_html = None # Prevent accidental acquisition (XXX ???)

  manage_options = (
    (
      {'label':'Edit', 'action':'manage_main'},
      {'label':'View', 'action':''},
      {'label':'Tidy', 'action':'manage_tidy'},
    )
    + PropertyManager.manage_options
    + Cacheable.manage_options
    + Historical.manage_options
  )

  security = ClassSecurityInfo()

  _metadata = ['title', 'author', 'publisher', 'source',
    'publication_date', 'description', 'keywords', 'summary']
  title = author = publisher = source = description = keywords = summary = ''
  _properties = (
    {'id':'title', 'type': 'string', 'mode': 'w'},
    {'id':'author', 'type': 'string', 'mode': 'w'},
    {'id':'publisher', 'type': 'string', 'mode': 'w'},
    {'id':'source', 'type': 'string', 'mode': 'w'},
    {'id':'publication_date', 'type': 'date', 'mode': 'w'},
    {'id':'description', 'type': 'text', 'mode': 'w'},
    {'id':'keywords', 'type': 'text', 'mode': 'w'},
  )

  __ac_permissions__=(
    ('Change HTML Documents',
     ('manage_editForm', 'manage', 'manage_main',
      'manage_edit', 'manage_upload', 'PUT',
      'manage_historyCopy',
      'manage_beforeHistoryCopy', 'manage_afterHistoryCopy',
      )
     ),
    )

  #
  # Constructor
  #
  def __init__(self, id='', title='', file='', __name__=''):
    if __name__ and not id:
      self._setId(__name__)
    else:
      self._setId(id)
    self.html_src = file
    self.title = title
    self.publication_date = DateTime()
    self._parse()

  def updateState(self):
    self._parse()
    self.ZCacheable_invalidate()
    self.reindex_object()

  #
  # Updates
  #
  manage_editForm = HTMLFile('dtml/editForm', globals())
  manage = manage_main = manage_editForm

  def manage_edit(self, data, title='', SUBMIT='Change', dtpref_cols='50',
          dtpref_rows='20', REQUEST=None):
    """
    Replaces a Documents contents with Data.

    The SUBMIT parameter is also used to change the size of the editing
    area on the default Document edit screen.  If the value is "Smaller",
    the rows and columns decrease by 5.  If the value is "Bigger", the
    rows and columns increase by 5.  If any other or no value is supplied,
    the data gets checked for DTML errors and is saved.
    """

    #if self._size_changes.has_key(SUBMIT):
    #  return self._er(data, SUBMIT, dtpref_cols, dtpref_rows, REQUEST)

    if title:
      self.title = title
    if type(data) is not type(''): 
      data = data.read()
    self.html_src = data
    self.updateState()

    if REQUEST:
      message="Content changed."
      return self.manage_main(self,REQUEST,manage_tabs_message=message)


  def manage_upload(self, file='', REQUEST=None):
    """Replace the contents of the document with the text in file."""

    if type(file) is not type(''): 
      file=file.read()
    self.html_src = file
    self.updateState()
    if REQUEST:
      message="Content uploaded."
      return self.manage_main(self,REQUEST,manage_tabs_message=message)

  def PUT(self, REQUEST, RESPONSE):
    "Handle HTTP/FTP PUT requests."

    self.dav__init(REQUEST, RESPONSE)
    self.html_src = REQUEST.get('BODY', '')
    self.updateState()
    RESPONSE.setStatus(204)
    return RESPONSE

  #
  #
  #
  def manage_FTPget(self):
    "Get HTML source for FTP download"

    return self.html_src

  def manage_tidy(self, REQUEST=None):
    "Return error message from tidy"

    return HTMLFile('dtml/tidy', globals())(
      self, self, REQUEST, tidy=self._tidy())

    return self.manage_tidy_dtml(tidy=self._tidy())

  manage_tidy_dtml = HTMLFile('dtml/tidy', globals())

  #
  # Rendering
  #
  def document_src(self, REQUEST=None, RESPONSE=None):
    """Return unprocessed document source."""

    if RESPONSE is not None:
      RESPONSE.setHeader('Content-Type', 'text/plain')
    return self.html_src

  def __call__(self, client=None, REQUEST={}, RESPONSE=None, **kw):
    "View document"

    security = getSecurityManager()
    security.addContext(self)
    try:
      if security.checkPermission('View', self.standard_html_header):
        res = self.standard_html_header(self, REQUEST)
        res = res + self.body
      else:
        res = self.body
      if security.checkPermission('View', self.standard_html_footer):
        res = res + self.standard_html_footer(self, REQUEST)
    finally:
      security.removeContext(self)
    return res

  def PrincipiaSearchSource(self):
    if not self.raw_text:
      self._parse()
    # Should we keep this ?
    res = string.join(
      re.split(r'[^%s]+' % (string.letters + string.digits), self.raw_text))
    return res

  #
  # Parse HTML content for metadata.
  #
  def _parse(self):
    self._parseBody()

    p = HTMLParser()
    p.feed(self.html_src)
    p.close()
    self.raw_text = p.getRaw()

    title = p.getTitle()
    if title:
      self.title = title

    author = p.getAuthor()
    if author:
      self.author = author

    publisher = p.getPublisher()
    if publisher:
      self.publisher = publisher

    source = p.getSource()
    if source:
      self.source = source

    keywords = p.getKeywords()
    if keywords:
      self.keywords = keywords

    description = p.getDescription()
    if description:
      self.summary = self.description = description
    else:
      raw = p.getRaw()
      self.summary = self.description = raw[0:min(200, len(raw))]

  def _parseBody(self):
    data = self.html_src
    # Was:
    # data = re.sub('.*<body(.*?)>(?i)(?s)', '', data)
    # data = re.sub('</body(.*?)>.*(?i)(?s)', '', data)
    # but it was much too slow.
    body_start_m = re.search('<body.*?>(?i)(?s)', data)
    if body_start_m:
      data = data[body_start_m.end():]

    body_end_m = re.search('</body.*?>(?i)(?s)', data)
    if body_end_m:
      data = data[:body_end_m.start()]

    self.body = data

  def _tidy(self):
    import popen2
    r, w, e = popen2.popen3('tidy -e')
    w.write(self.html_src)
    w.flush()
    w.close()
    r.read()
    return e.read()


InitializeClass(HTMLDocument)

addHTMLDocumentForm = HTMLFile('dtml/addForm', globals(), 
  Kind='HTML Document', kind='HTMLDocument')

def cookId(id, file):
  if not id and hasattr(file, 'filename'):
    filename = file.filename
    #title = title or filename
    id = filename[max(
      string.rfind(filename, '/'),
      string.rfind(filename, '\\'),
      string.rfind(filename, ':'),)+1:]
  return id

def addHTMLDocument(self, id, title='', file='', REQUEST=None, submit=None):
  """Add a HTML Document object with the contents of file."""

  id = str(id)
  id = cookId(id, file)
  if type(file) is not type(''): 
    file = file.read()

  ob = HTMLDocument(id=id, title=title, file=file)
  id = self._setObject(id, ob)
  if REQUEST is not None:
    try: u = self.DestinationURL()
    except: u = REQUEST['URL1']
    if submit == " Add and Edit ": u = "%s/%s" % (u, quote(id))
    REQUEST.RESPONSE.redirect(u + '/manage_main')
  return ''

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# 
# vim:et:ts=2:ai
