#
# Copyright 2001 Stfane Fermigier and Nuxeo SARL
# See LICENSE.TXT for licensing information
#

import os, tempfile, re, string
debug = 0

class ConversionError:
  pass

class Plugin:
  name = 'Base plugin'
  converter_class = None

  def __init__(self, name, converter_classes=[]):
    self.name = name
    self.converter_classes = converter_classes[:]
    self.acceptable_file_types = []
    self.acceptable_mime_types = []

  def getConverter(self, name, data):
    for converter_class in self.converter_classes:
      try:
        return converter_class(name, data)
      except ConversionError:
        pass

  def addConverterClass(self, converter_class):
    self.converter_classes.append(converter_class)

  def setName(self, name):
    self.name = name

  def setMimeType(self, mime_type):
    self.mime_type = mime_type

  def addAcceptableMimeType(self, mime_type):
    self.acceptable_mime_types.append(mime_type)

  def addAcceptableFileType(self, pattern):
    self.acceptable_file_types.append(pattern)

  def acceptMimeType(self, mime_type):
    return mime_type in self.acceptable_mime_types

  def acceptFileType(self, file_type):
    if debug:
      print `file_type`
    for pattern in self.acceptable_file_types:
      if re.match('.*: ' + pattern + '\n$', file_type):
        return 1
  

class ConverterBase:
  def __init__(self, name, data):
    self.name = name
    self.data = data
  
  def getRaw(self):
    "Return raw content"

    return self.data

  def getText(self):
    "Return content as raw text"

    return ''

  def getHtml(self):
    "Return content as HTML"

    return ''

  def getMimeType(self):
    "Return computed mime type"

    return self.mime_type

  def getImageNames(self):
    "Return list of image names"

    return []

  def getImage(self, image_name):
    "Return image (as raw content)"

    raise KeyError


class ExternalConverterBase(ConverterBase):

  def __init__(self, name, data):
    ConverterBase.__init__(self, name, data)

    self.tmpdir = tempfile.mktemp()
    os.mkdir('%s' % self.tmpdir)

    fd = open("%s/%s.%s" % (self.tmpdir, self.name, self.suffix), "w")
    fd.write(self.data)
    fd.close()

    self._convert()

  def __del__(self):
    os.system("rm -rf '%s'" % self.tmpdir)

  def _convert(self):
    self._convertToHtml()
    self._convertToText()

  def _convertToText(self):
    cmd = "cd '%s' && lynx -dump '%s.html' > '%s.txt' 2> /dev/null" \
      % (self.tmpdir, self.name, self.name)
    os.system(cmd)

  #
  # Accessors to get results.
  #
  def getMimeType(self):
    return self.mime_type
   
  def getText(self):
    return open("%s/%s.txt" % (self.tmpdir, self.name)).read()

  def getHtml(self):
    return open("%s/%s.html" % (self.tmpdir, self.name)).read()

  def getBody(self):
    data = self.getHtml()
    m = re.search('<body[^>]*>(.*)</body[^>]*>(?i)(?s)', data)
    if m:
      data = m.group(1)
    return data

  def getTitle(self):
    data = open("%s/%s.html" % (self.tmpdir, self.name)).read()
    m = re.search('<title[^>]*>(.*)</title[^>]*>(?s)(?i)', data)
    if m:
      return string.strip(m.group(1))
    else:
      return ''

