from win32com.client import constants,Dispatch import pythoncom import base64, os, whrandom, re, string from time import * #Global variable applying to all functions home = 'e:\\temp\\zope\\' image_re = re.compile('image\d+\.(gif|jpg|png)') padder = re.compile('(\D*)(\d*)(.*)') def log(user, func, description): logfile = open(home+'zlave.log', 'a') logfile.write('\''+str(user)+'\' - '+func+' - '+strftime('%d/%m/%Y - %H:%M:%S', gmtime(time()))+': - '+description+'\n') logfile.close() def word2html(self, file=None, user=None, no_cleanup=None): """ Check to see if the necessary parameters were passed. If not, return an explanation of how to use this method. """ if file == None or user == None: return """ word2html requires a base64 encoded Word file and a \'user\' string to be passed. An optional \'no_cleanup\' flag means the returned html is that directly generated by Word2000. Return value is an array(base64 encoded html, struct{image name, base64 encoded image}) The html file expects to find its images in a folder \'./image_files\'. If there are any errors, the first element of the returned array will simply be the string \'Error\', with the second value being a description of the error. You should therefore check the first returned value for being \'Error\' before you do anything else. If there were no images generated by your Word file, the \'images\' struct simply contains a single key \'null\'. This is in case certain xmlrpc implementations don't like empty structs. """ #Setup variables for filenames/paths daddr = str(whrandom.randint(0,100000))+'\\' faddr = 'image' func = 'word2html' doc_html = '' images = {} #Make a unique temp directory for this process while 1: try: os.makedirs(home+daddr) break except OSError: daddr = str(whrandom.randint(0,100000))+'\\' #Write the incoming Word file to the unique directory o_tdoc = open(home+daddr+faddr+'.doc', 'wb') o_tdoc.write(base64.decodestring(file)) o_tdoc.close() #Start Word try: #pythoncom.CoInitializeEx pythoncom.CoInitialize() w=Dispatch("Word.Application") documents=w.Documents try: #Open the incoming Word file doc = documents.Open(home+daddr+faddr+'.doc') #SaveAs .html doc.SaveAs(home+daddr+faddr+'.html', 8) #102 or 8 seem to apply to HTML on different boxes #A hack to get around access permissions for the .html file that was written doc = documents.Open(home+daddr+faddr+'.html') #Close the document so we can delete it later doc.Close() except: #log an error and return it to the client machine log(user, func, 'Failed to convert Word document to html') html_doc = 'Error' images = 'Failed to convert Word document to html' try: #Exit Word w.Quit() except: pass except: try: #Exit Word w.Quit() except: pass #log an error and return it to the client machine log(user, func, 'Failed to start Word') doc_html = 'Error' images = 'Failed to start Word' #Check that we haven't received an error from before if doc_html != 'Error': """ This section prepares the return values """ if no_cleanup == None: #Use MSFilter v2 (for Office2000) to clean up the garbage cleanup_code = os.spawnv(os.P_WAIT, r'D:\winnt\filter', ('-b','-l','-f','-m','-r','-s','-t',home+daddr+faddr+'.html')) if cleanup_code != 1: log(user, func, 'Irregular MS Filter exit code = '+str(cleanup_code)) #Get the .html into a variable o_doc_html = open(home+daddr+faddr+'.html', 'r') doc_html = o_doc_html.read() o_doc_html.close() #doc_html needs encoding or we get errors doc_html = base64.encodestring(doc_html) #Deal with any images (or other files for that matter) if os.access(home+daddr+'image_files', os.F_OK): os.chdir(home+daddr+'image_files') for image in os.listdir(home+daddr+'image_files'): if image_re.search(image): #Found either a gif or a jpg #Open the image(or whatever) file tmp_image = open(image, 'rb') #Read the image (base64 encoded) into a dictionary/struct called 'images' images[image] = base64.encodestring(tmp_image.read()) tmp_image.close() else: pass #Check to see if there were any images if len(images.keys()) == 0: #If not, add {null:null} in case some xml-rpc implementations complain #This may be unnecessary images['null'] = 'null' #'if' (conditional on no error) ends """ This section cleans up the temporary files that were created """ if os.access(home+daddr+'image_files', os.F_OK): for each in os.listdir(home+daddr+'image_files'): try: os.remove(each) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete'+home+daddr+'image_files\\'+each+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'image_files\\'+each+'\'') pass if os.access(home+daddr+'image_files.bak', os.F_OK): os.chdir(home+daddr+'image_files.bak') for each in os.listdir(home+daddr+'image_files.bak'): try: os.remove(each) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete'+home+daddr+'image_files.bak\\'+each+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'image_files.bak\\'+each+'\'') pass os.chdir(home+daddr) for each in os.listdir(home+daddr): try: os.remove(each) except OSError, e: #Logging may be in order here try: os.rmdir(each) except: log(user, func, 'Failed to delete'+home+daddr+each+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+each+'\'') pass os.chdir(home) try: os.rmdir(daddr) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'\'') pass if doc_html != 'Error': #Log our successful conversion and return the result to the client log(user, func, 'Successful Word2HTML conversion performed') return [doc_html, images] def html2word(self=None, file=None): #Setup variables for filenames/paths daddr = str(whrandom.randint(0,100000))+'\\' faddr = 'file' func = 'html2word' log(None, func, 'setup variables', 'Created') #debug help if file == None: file = 'e:\\temp\\zope\\test.html' fileIO = open(file, 'r') file = base64.encodestring(fileIO.read()) fileIO.close() log(None, func, 'debug fileIO', 'Done') else: log(None, func, 'debug fileIO', 'skipped') #Make a unique temp directory for this process while 1: try: os.makedirs(home+daddr) log(None, func, home+daddr, 'Created') break except OSError: daddr = str(whrandom.randint(0,100000))+'\\' #Write the incoming html file to the unique directory o_tdoc = open(home+daddr+faddr+'.html', 'w') o_tdoc.write(base64.decodestring(file)) log(None, func, home+daddr+faddr+'.html', 'Created') o_tdoc.close() #Start Word pythoncom.CoInitialize() w=Dispatch("Word.Application") documents=w.Documents #Open the incoming html file in Word doc = documents.Open(home+daddr+faddr+'.html') #SaveAs Word .doc file doc.SaveAs(home+daddr+faddr+'.doc', 0) #13 #doc.macro.HTMLFileSaveAsDoc(home+daddr+faddr+'.doc') log(None, func, home+daddr+faddr+'.doc', 'Created') #A hack to get round access permissions for the .html file that was written doc = documents.Open(home+daddr+faddr+'.doc') #Finish it all off doc.Close() w.Quit() #Get the .doc into a variable o_word_doc = open(home+daddr+faddr+'.doc', 'rb') word_doc = o_word_doc.read() o_word_doc.close() #Attempt to clean up after ourself os.chdir(home+daddr) for each in os.listdir(home+daddr): try: os.remove(each) log(None, func, home+daddr+each, 'Deleted') except OSError, e: #Logging may be in order here log(None, func, home+daddr+each, 'Failed to delete with error = '+str(e)) pass except: #Logging may be in order here log(None, func, home+daddr+each, 'Failed to delete') pass os.chdir(home) try: os.rmdir(daddr) log(None, func, home+daddr, 'Deleted') except OSError, e: #Logging may be in order here log(None, func, home+daddr, 'Failed to delete with error = '+str(e)) pass except: #Logging may be in order here log(None, func, home+daddr+each, 'Failed to delete') pass word_doc = base64.encodestring(word_doc) return word_doc def ppt2html(self, file, user, format='png', x=400, y=320): #Setup variables for filenames/paths daddr2 = str(whrandom.randint(0,100000)) daddr = daddr2+'\\' faddr = 'file' func = 'ppt2html' accepted_images = ('gif', 'jpg', 'png') images = {} """ Marshall the 'format' variable Powerpoint accepts a string corresponding to any image format in the system registry. I will limit things to 'gif', 'jpg', 'png'. """ format = string.lower(str(format)) if format not in accepted_images: log(user, func, 'Requested image format \''+format+'\' unavailable') return {'Error':'Unavailable image format requested. Accepted Values are '+str(accepted_images)} """ Check out the x and y values that were passed """ try: int(x) int(y) except: log(user, func, 'Inappropriate x or y value \''+str(x)+'\', \''+str(y)+'\'') return {'Error':'Inappropriate x and/or y values passed. Accepted values are integers or strings convertable to integers in Python. These are optional values with defaults of 400 and 320'} #Make a unique temp directory for this process while 1: try: os.makedirs(home+daddr) break except OSError: daddr = str(whrandom.randint(0,100000))+'\\' #Write the incoming Powerpoint file to the unique directory o_tdoc = open(home+daddr+faddr+'.ppt', 'wb') o_tdoc.write(base64.decodestring(file)) o_tdoc.close() try: #Start Powerpoint pythoncom.CoInitializeEx w = Dispatch("Powerpoint.Application") presentations = w.Presentations try: #Open the incoming Powerpoint file doc = presentations.Open(FileName=home+daddr+faddr+'.ppt', ReadOnly=0, Untitled=0, WithWindow=0) #SaveAs .html - this is not possible in PPT97/98, could save as images though. #SaveAs .html now available in Office2000 #doc.SaveAs(home+daddr+faddr+'.html', ppSaveAsHTML) #102 or 8 seem to apply to HTML on different boxes doc.Export(home+daddr2, format, x, y) #Finish it all off doc.Close() except: #Do something about failure to make image files log(user, func, 'Failed to convert Powerpoint document to images') images['Error'] = 'Failed to convert Powerpoint document to images' try: w.Quit() except: pass except: try: #Exit Powerpoint w.Quit() except: pass #log an error and return it to the client machine log(user, func, 'Failed to start Powerpoint') images['Error'] = 'Failed to start Powerpoint' #Check we haven't already got an error from above if images.has_key('Error') == 0: """ This section prepares the return values """ os.chdir(home+daddr) images = {} for image in os.listdir(home+daddr): if image == 'file.ppt': #This is the original .ppt file pass else: #Open the image(or whatever) file tmp_image = open(image, 'rb') #Read the image (base64 encoded) into a dictionary/struct called 'images' """ New addition to pad out the number part of the file names with 0's. This ensures that slides are displayed in the correct order when sorted alphanumerically. e.g. 'Slide1.png' -> 'Slide0001.png' """ split_image = padder.match(image).groups() split_image_list = [] for each in split_image: split_image_list.append(each) split_image_list[1] = string.zfill(split_image_list[1], 4) image = string.join(split_image_list, '') """End Additions""" images[image] = base64.encodestring(tmp_image.read()) tmp_image.close() #Check to see if there were any images if len(images.keys()) == 0: #If not, add {null:null} in case certain things complain #This may be unnecessary images['null'] = 'null' #Attempt to clean up after ourself os.chdir(home+daddr) for each in os.listdir(home+daddr): try: os.remove(each) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+each+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+each+'\'') pass os.chdir(home) try: os.rmdir(daddr) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'\'') pass if images.has_key('Error') == 0: #Log our successful conversion and return the result to the client log(user, func, 'Successful Powerpoint2Image conversion performed') return images def xl2html(self, file, user, no_cleanup=None): #Setup variables for filenames/paths daddr = str(whrandom.randint(0,100000))+'\\' faddr = 'image' func = 'xl2html' doc_html = '' images = {} #Make a unique temp directory for this process while 1: try: os.makedirs(home+daddr) break except OSError: daddr = str(whrandom.randint(0,100000))+'\\' #Write the incoming Excel file to the unique directory o_tdoc = open(home+daddr+faddr+'.xls', 'wb') o_tdoc.write(base64.decodestring(file)) o_tdoc.close() #Start Excel try: pythoncom.CoInitializeEx #pythoncom.CoInitialize() w=Dispatch("Excel.Application") books=w.Workbooks try: #Open the incoming Excel file file=books.Open(home+daddr+faddr+'.xls') #SaveAs .html file.SaveAs(home+daddr+faddr+'.html', constants.xlHtml) #Must have run Makepy.py in order to use 'constants.xlHtml' #A hack to get around access permissions for the .html file that was written doc = books.Open(home+daddr+faddr+'.html') #Close the document so we can delete it later doc.Close() except: #log an error and return it to the client machine log(user, func, 'Failed to convert Excel document to html') html_doc = 'Error' images = 'Failed to convert Excel document to html' try: #Exit Excel w.Quit() except: pass except: try: #Exit Excel w.Quit() except: pass #log an error and return it to the client machine log(user, func, 'Failed to start Excel') doc_html = 'Error' images = 'Failed to start Excel' #Check that we haven't received an error from before if doc_html != 'Error': """ This section prepares the return values """ if no_cleanup == None: #Use MSFilter v2 (for Office2000) to clean up the garbage cleanup_code = os.spawnv(os.P_WAIT, r'D:\winnt\filter', ('-b','-l','-f','-m','-r','-s','-t',home+daddr+faddr+'.html')) if cleanup_code != 1: log(user, func, 'Irregular MS Filter exit code = '+str(cleanup_code)) #Get the .html into a variable o_doc_html = open(home+daddr+faddr+'.html', 'r') doc_html = o_doc_html.read() o_doc_html.close() #doc_html needs encoding or we get errors doc_html = base64.encodestring(doc_html) #Deal with any images (or other files for that matter) if os.access(home+daddr+'image_files', os.F_OK): os.chdir(home+daddr+'image_files') for image in os.listdir(home+daddr+'image_files'): if image_re.search(image): #Found a gif, png or jpg #Open the image(or whatever) file tmp_image = open(image, 'rb') #Read the image (base64 encoded) into a dictionary/struct called 'images' images[image] = base64.encodestring(tmp_image.read()) tmp_image.close() else: pass #Check to see if there were any images if len(images.keys()) == 0: #If not, add {null:null} in case some xml-rpc implementations complain #This may be unnecessary images['null'] = 'null' #'if' (conditional on no error) ends """ This section cleans up the temporary files that were created """ if os.access(home+daddr+'image_files', os.F_OK): for each in os.listdir(home+daddr+'image_files'): try: os.remove(each) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete'+home+daddr+'image_files\\'+each+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'image_files\\'+each+'\'') pass if os.access(home+daddr+'image_files.bak', os.F_OK): os.chdir(home+daddr+'image_files.bak') for each in os.listdir(home+daddr+'image_files.bak'): try: os.remove(each) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete'+home+daddr+'image_files.bak\\'+each+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'image_files.bak\\'+each+'\'') pass os.chdir(home+daddr) for each in os.listdir(home+daddr): try: os.remove(each) except OSError, e: #Logging may be in order here try: os.rmdir(each) except: log(user, func, 'Failed to delete'+home+daddr+each+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+each+'\'') pass os.chdir(home) try: os.rmdir(daddr) except OSError, e: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'\' with error = '+str(e)) pass except: #Logging may be in order here log(user, func, 'Failed to delete \''+home+daddr+'\'') pass if doc_html != 'Error': #Log our successful conversion and return the result to the client log(user, func, 'Successful Excel2HTML conversion performed') return [doc_html, images] #file = base64.encodestring(open(r'e:\temp\zope\Book1.xls', 'rb').read()) #xl2html(None, file, 'debug')