#
# Copyright 2001-2002 Stfane Fermigier and Nuxeo SARL
# See LICENSE.TXT for licensing information
#
from plugin import Plugin, ExternalConverterBase, ConversionError
import os, re, misc

# wv config file (wvZope.xml) is in same directory as this module.
WV_CONFIG = os.path.join(
  os.getcwd(), os.path.dirname(__file__), 'wvZope.xml')

def debug(s):
  #print s
  pass

class MSWordConverter(ExternalConverterBase):
  suffix = 'doc'
  mime_type = 'application/msword'

  def _convertToHtml(self):
    # was "cd '%s' && wvWare -c iso-8859-1 -x %s '%s.%s' > '%s.html'..."
    cmd = ("cd '%s' && wvHtml '%s.%s' '%s.html' 2> %s.log-wvWare" 
        % (self.tmpdir, self.name, self.suffix, self.name, self.name))
    misc.systemWithTimeout(cmd, 30)
    log = open("%s/%s.log-wvWare" % (self.tmpdir, self.name), "r").read()
    if re.search('Not a word document', log):
      debug('MSWordConverter says: %s' % log)
      raise ConversionError


  # XXX: remove because bogus in wv 0.7.0 -> use lynx -dump instead
  #def _convertToText(self):
  #  cmd = "cd '%s' && wvText '%s.doc' '%s.txt' 2> /dev/null" \
  #    % (self.tmpdir, self.name, self.name)
  #  os.system(cmd)

  def getHtml(self):
    data = ExternalConverterBase.getHtml(self)
    if re.search('CONTENT="text/html; charset=UTF-8"', data):
      try:
        data = unicode(data, 'utf-8').encode('iso-8859-1')
        data = re.sub('CONTENT="text/html; charset=UTF-8"',
          'CONTENT="text/html; charset=iso-8859-1"', data)
      except:
        pass
    return re.sub(
      '<img alt="0x08 graphic" src="StrangeNoGraphicData">', '', data)

  def getImageNames(self):
    image_names = []
    for fn in os.listdir(self.tmpdir):
      m = re.match("^.+\.(?P<ext>.+)$", fn)
      if m and m.group(1) in ('png', 'jpg', 'gif', 'wmz', 'wmf'):
        image_names.append(fn)
    return image_names

  def getImage(self, image_name):
    return open("%s/%s" % (self.tmpdir, image_name)).read()

class MSPowerPointConverter(ExternalConverterBase):
  suffix = 'ppt'
  mime_type = 'application/vnd.ms-powerpoint'

  def _convertToHtml(self):
    cmd = "cd '%s' && ppthtml '%s.%s' > '%s.html' 2> '%s.log-ppthtml'" \
      % (self.tmpdir, self.name, self.suffix, self.name, self.name )
    misc.systemWithTimeout(cmd, 30)
    log = open ("%s/%s.log-ppthtml" % (self.tmpdir, self.name)).read()
    if re.search('OLE2 object not found', log):
      debug('MSPowerPointConverter says: %s' % log)
      raise ConversionError
      
class MSExcelConverter(ExternalConverterBase):
  suffix = 'xls'
  mime_type = 'application/vnd.ms-excel'

  def _convertToHtml(self):
    cmd = "cd '%s' && xlhtml -nh -a '%s.%s' > '%s.html' 2> '%s.log-xlhtml'" \
      % (self.tmpdir, self.name, self.suffix, self.name, self.name )
    misc.systemWithTimeout(cmd, 30)
    log = open("%s/%s.log-xlhtml" % (self.tmpdir, self.name), "r").read()
    os.unlink("%s/%s.log-xlhtml" % (self.tmpdir, self.name))
    if re.search('OLE2 object not found', log):
      debug('MSExcelConverter says: %s' % log)
      raise ConversionError

p = Plugin('MSOffice', 
  [MSWordConverter, MSPowerPointConverter, MSExcelConverter])
p.addAcceptableMimeType('text/msword')
p.addAcceptableMimeType('application/msword')
p.addAcceptableMimeType('application/vnd.ms-word')
p.addAcceptableMimeType('application/vnd.ms-powerpoint')
p.addAcceptableMimeType('application/vnd.ms-excel')
p.addAcceptableFileType('Microsoft Word (6.0)? Document')
p.addAcceptableFileType('Microsoft Office Document')
p.addAcceptableFileType('Microsoft Word document data')
 
def getPlugin():
  return p                                                                      

