#!/usr/bin/python

#
# Copyright 2001-2002 Stfane Fermigier and Nuxeo SARL
# See LICENSE.TXT for licensing information
#

import sys, os, unittest, re, glob, string
sys.path.insert(0, '..')
import plugins

class PluginSelectionTestCase(unittest.TestCase):
  "Test plugin selection mechanism (by file / by mime type)"

  def test_selectByMimeType(self):
    d = {
      'text/rtf': 'RTF',
      'application/msword': 'MSWord',
      'application/vnd.ms-powerpoint': 'MSPowerPoint',
      'application/vnd.ms-excel': 'MSExcel',
      'application/pdf': 'PDF',
      'application/vnd.sun.xml.writer': 'OpenOffice',
      'text/plain': 'Text',
      }
    for mime_type, expected in d.items():
      self.assertEquals(
        plugins.selectPlugin('', mime_type).getName(), expected)

  def test_selectByFileType(self):
    for fn in glob.glob('test[0-9].*'):
      data = open(fn).read()
      plugin_name = plugins.selectPlugin(data, filename=fn).getName()
      if re.match('.*\.ppt$', fn):
        self.assertEquals(plugin_name, 'MSPowerPoint',
        msg="%s should use plugin %s, not %s" % (fn, 'MSPowerPoint', plugin_name))
      elif re.match('.*\.xls$', fn):
        self.assertEquals(plugin_name, 'MSExcel',
        msg="%s should use plugin %s, not %s" % (fn, 'MSExcel', plugin_name))
      elif re.match('.*\.doc$', fn):
        self.assertEquals(plugin_name, 'MSWord',
        msg="%s should use plugin %s, not %s" % (fn, 'MSWord', plugin_name))
      elif re.match('.*\.pdf$', fn):
        self.assertEquals(plugin_name, 'PDF',
        msg="%s should use plugin %s, not %s" % (fn, 'PDF', plugin_name))
      elif re.match('.*\.rtf$', fn):
        self.assertEquals(plugin_name, 'RTF',
        msg="%s should use plugin %s, not %s" % (fn, 'RTF', plugin_name))
      elif re.match('.*\.(sxi|sxc|sxw)$', fn):
        self.assertEquals(plugin_name, 'OpenOffice',
        msg="%s should use plugin %s, not %s" % (fn, 'OpenOffice', plugin_name))
      elif re.match('.*\.(htm|html)$', fn):
        self.assertEquals(plugin_name, 'HTML',
        msg="%s should use plugin %s, not %s" % (fn, 'HTML', plugin_name))
      elif re.match('.*\.txt$', fn):
        self.assertEquals(plugin_name, 'Text',
        msg="%s should use plugin %s, not %s" % (fn, 'Text', plugin_name))
      else:
        self.assertEquals(plugin_name, 'Dumb',
        msg="%s should use plugin %s, not %s" % (fn, 'Dumb', plugin_name))


class ConverterTestCase(unittest.TestCase):
  "Test conversion by plugins"

  def test_Base(self):
    doc = plugins.plugin.ConverterBase('titi')
    self.assertEquals(doc.getRaw(), 'titi')
    self.assertEquals(doc.getHtml(), '')
    self.assertEquals(doc.getText(), '')
    self.assertEquals(doc.getBody(), '')
    self.assertEquals(doc.getTitle(), '')
    self.assertEquals(doc.getImageNames(), [])
    self.assertRaises(KeyError, doc.getImage, '')

  #
  # File loading
  #

  def loadFile(self, pluginname, filename):
    plugin = plugins.getPluginByName(pluginname)
    data = open(filename).read()
    self.result = plugin.getConverter(data)

  #
  # MS-Word tests
  #
  def checkWordContentAscii(self):
    test_string = 'Linux rulez. Window$ SuCk$$$.'
    self.assertEquals(string.count(self.result.getHtml(), test_string), 1)
    self.assertEquals(string.count(self.result.getText(), test_string), 1)

  def checkWordContentUnicode(self):
    test_string = 'Il fait chaud en t.'
    self.assertEquals(string.count(self.result.getHtml(), test_string), 1)
    self.assertEquals(string.count(self.result.getText(), test_string), 1)

  def test_MSWord1(self):
    self.loadFile('MSWord', 'test1.doc')
    self.checkWordContentAscii()
    self.assertEquals(self.result.getTitle(), 'Untitled')

  def test_MSWord2(self):
    self.loadFile('MSWord', 'test2.doc')
    self.checkWordContentAscii()

  def test_MSWord3(self):
    self.loadFile('MSWord', 'test3.doc')
    self.checkWordContentAscii()

  def test_MSWord4(self):
    self.loadFile('MSWord', 'test4.doc')
    self.checkWordContentAscii()

    images_names = self.result.getImageNames()
    self.assertEquals(len(images_names), 1)
    self.assertEquals(images_names[0], 'convert0.png')
    self.assertEquals(len(self.result.getImage('convert0.png')), 5690)

  def test_MSWord5(self):
    self.loadFile('MSWord', 'test5.doc')
    self.checkWordContentUnicode()

  #
  # RTF Tests
  #
  def test_RTF1(self):
    self.loadFile('RTF', 'test1.rtf')
    self.checkWordContentAscii()

  #
  # PDF Tests
  #
  def test_PDF1(self):
    self.loadFile('PDF', 'test1.pdf')
    self.checkWordContentAscii()

  #
  # PPT Tests
  #
  def test_PPT1(self):
    self.loadFile('MSPowerPoint', 'test1.ppt')
    test_string = 'Test de ppthtml'
    self.assertEquals(string.count(self.result.getHtml(), test_string), 1)

  #
  # XLS Tests
  #
  def test_XLS1(self):
    self.loadFile('MSExcel', 'test1.xls')
    test_string = 'Test de xlhtml'
    self.assertEquals(string.count(self.result.getHtml(), test_string), 1)

  #
  # OpenOffice tests
  #
  def test_SXC1(self):
    self.loadFile('OpenOffice', 'test1.sxc')
    test_string = 'Test de xlhtml'
    res = string.strip(re.sub(r'\W+', ' ', self.result.getText()))
    self.assertEquals(res, test_string)

  def test_SXI1(self):
    self.loadFile('OpenOffice', 'test1.sxi')
    test_string = 'Test de ppthtml'
    res = string.strip(re.sub(r'\W+', ' ', self.result.getText()))
    self.assertEquals(res, test_string)

  #
  # HTML tests
  #
  def test_HTML1(self):
    self.loadFile('HTML', 'test1.html')
    # XXX: fix this
    test_string = 'Le titre Linux rulez Window SuCk'
    res = string.strip(re.sub(r'\W+', ' ', self.result.getText()))
    self.assertEquals(res, test_string)
    self.assertEquals(self.result.getTitle(), 'Le titre')

  #
  # Text tests
  #
  def test_Text1(self):
    self.loadFile('Text', 'test1.txt')
    test_string = 'Linux rulez Window SuCk'
    res = string.strip(re.sub(r'\W+', ' ', self.result.getText()))
    self.assertEquals(res, test_string)
    res = string.strip(re.sub(r'\W+', ' ', self.result.getBody()))
    self.assertEquals(res, 'pre '+test_string+' pre')

if __name__ == '__main__':
  unittest.main()


