#
# Copyright 2001 Stfane Fermigier and Nuxeo SARL
# See LICENSE.TXT for licensing information
#

import os, tempfile

class ConverterBase:
  def __init__(self, name, data):
    self.name = name
    self.data = data
  
  def getRaw(self):
    "Return raw content"

    return self.data

  def getText(self):
    "Return content as raw text"

    return ''

  def getHtml(self):
    "Return content as HTML"

    return ''

  def getImageNames(self):
    "Return list of image names"

    return []

  def getImage(self, image_name):
    "Return image (as raw content)"

    raise KeyError

class ExternalConverterBase(ConverterBase):

  def __init__(self, name, data):
    ConverterBase.__init__(self, name, data)

    self.tmpdir = tempfile.mktemp()
    os.mkdir('%s' % self.tmpdir)

    fd = open("%s/%s%s" % (self.tmpdir, self.name, self.suffix), "w")
    fd.write(self.data)
    fd.close()

    self._convert()

  def __del__(self):
    for fn in os.listdir("%s" % self.tmpdir):
      os.remove("%s/%s" % (self.tmpdir, fn))
    os.rmdir("%s" % self.tmpdir)

  def _convert(self):
    self._convertToHtml()
    self._convertToText()

  def _convertToText(self):
    cmd = "cd '%s' && lynx -dump '%s.html' > '%s.txt' 2> /dev/null" \
      % (self.tmpdir, self.name, self.name)
    os.system(cmd)

  #
  # Accessors to get results.
  #
  def getText(self):
    return open("%s/%s.txt" % (self.tmpdir, self.name)).read()

  def getHtml(self):
    data = open("%s/%s.html" % (self.tmpdir, self.name)).read()
    return data

