#
# Copyright 2001 Stfane Fermigier and Nuxeo SARL
# See LICENSE.TXT for licensing information
#

import os, tempfile, re, string
import misc

try:
  from zLOG import LOG, DEBUG, ERROR
except ImportError:
  # When called by tests.
  def LOG(*args, **kw):
    pass
  DEBUG = None
  ERROR = None

def checkCmd(cmd):
    if os.system('which %s > /dev/null 2> /dev/null' % cmd) != 0:
        raise "Initialisation Error", "Command '%s' is missing" % cmd

checkCmd('lynx')

class ConversionError(Exception):
    pass

class MatcherBase:
  acceptable_mime_types = []
  acceptable_file_types = []
  acceptable_suffixes = []

  def matches(self,
              data=None, # passed by client
              mime_type=None, # passed by client
              filename=None, # passed by client
              data_mime_type=None, # deduced by file
              data_file_type=None, # deduced by file
              ):
    """Returns a boolean saying if the info matches."""
    if mime_type in self.acceptable_mime_types:
      return 1
    if data_mime_type in self.acceptable_mime_types:
      return 1
    if data_file_type is not None:
      for pattern in self.acceptable_file_types:
        if re.match('^%s$' % pattern, data_file_type):
          return 1
    return 0

class ConverterBase:
  tmpdir = None
  def __init__(self, data):
    self.name = 'convert' # XXX factor this out
    self.data = data

  def getRaw(self):
    "Return raw content"
    return self.data

  def getText(self):
    "Return content as raw text"
    # XXX: we should use lynx -dump here
    return re.sub('<[^>]*>(?i)(?m)', '', self.getHtml())

  def getHtml(self):
    "Return content as HTML"
    return ''

  def getBody(self):
    "Return body content as HTML"
    data = self.getHtml()
    m = re.search('<body[^>]*>(.*)</body[^>]*>(?i)(?s)', data)
    if m:
      data = m.group(1)
    return data

  def getTitle(self):
    "Return title"
    data = self.getHtml()
    m = re.search('<title[^>]*>(.*)</title[^>]*>(?s)(?i)', data)
    if m:
      return string.strip(m.group(1))
    else:
      return ''

  def getMimeType(self):
    "Return computed mime type"
    return self.mime_type

  def getImageNames(self):
    "Return list of image names"
    return []

  def getImage(self, image_name):
    "Return image (as raw content)"
    raise KeyError


class ExternalConverterBase(ConverterBase):

  def __init__(self, data):
    ConverterBase.__init__(self, data)

    self.tmpdir = tempfile.mktemp()
    self.basefilename = "%s/%s" % (self.tmpdir, self.name)
    os.mkdir('%s' % self.tmpdir)

    try:
        fname = "%s.%s" % (self.basefilename, self.suffix)
        fd = open(fname, "w")
        fd.write(self.data)
        fd.close()
    except IOError:
        LOG('NuxDocument', ERROR, 'Cannot write to file "%s"' % fname)
        raise ConversionError

    self._convert()

  def __del__(self):
      if self.tmpdir:
          os.system("rm -rf '%s'" % self.tmpdir)

  def _convert(self):
    self._convertToHtml()
    self._convertToText()

  def _runCommand(self, cmd, logext, timeout=30):
      LOG('NuxDocument', DEBUG, 'Running command %s' % cmd)
      status = misc.systemWithTimeout(cmd, timeout=timeout)
      if status != 0:
          log = self._getFile("%s.log-%s" % (self.basefilename, logext))
          LOG('NuxDocument', ERROR, 'Calling "%s" return status %d, log:\n%s'
              % (cmd, status, log))
          raise ConversionError

  def _convertToText(self):
    cmd = "cd '%s' && lynx -dump '%s.html' > '%s.txt' 2> /dev/null" \
      % (self.tmpdir, self.basefilename, self.basefilename)
    os.system(cmd)

  #
  # Accessors to get results.
  #
  def getMimeType(self):
    return self.mime_type

  def getText(self):
    return self._getFile("%s.txt" % self.basefilename)

  def getHtml(self):
    return self._getFile("%s.html" % self.basefilename)

  def _getFile(self, filename):
      try:
          f = os.stat(filename)
      except OSError:
          LOG('NuxDocument', ERROR, 'Cannot find file "%s"' % filename)
          return ''
      try:
          return open(filename).read()
      except IOError:
          LOG('NuxDocument', ERROR, 'Cannot read file "%s"' % filename)
          return ''



class Plugin:
  def __init__(self, name, matcher_class, converter_class):
    self.name = name
    self.matcher = matcher_class()
    self.converter_class = converter_class

  def getName(self):
    return self.name

  def getMatcher(self):
    return self.matcher

  def getConverter(self, data):
    return self.converter_class(data)

