#!/usr/local/bin/python import string import re import os import sys import time import pprint # global vars defined at bottom near driver function class AcsSqlParser: """ AcsSqlParser - parses a tcl file looking for calls to the acs db api. returns a list of AcsSqlFragments which correspond to calls the db api. """ # regular expressions # this is the future one... but i need to add some logic to deal # with these cases ie db_string foo $sql #db_api = re.compile('db_.*?[[{"$].*?[]}"]', re.DOTALL) db_api = re.compile('db_.*?[{"].*?[}"]', re.DOTALL) tcl_comment = re.compile('#.*') def __init__(self, acs_sql_map=None): self.factory = AcsSqlFactoryDispatcher(acs_sql_map) def parseFile(self, file_name): fh = open(file_name) results = self.parseString(fh.read()) fh.close() return results def parseString(self, tcl_file_string): results = [] tcl_file_string = self.tcl_comment.sub('',tcl_file_string) for miscreant in DB_API_MISCREANTS: tcl_file_string=string.replace(tcl_file_string, miscreant, '') sql_list = self.db_api.findall(tcl_file_string) for sqlstring in sql_list: results.append(self.factory.dispatch(sqlstring)) return filter(None,results) def getStatistics(self): """ oo is so much better when you can break it:)""" return self.factory.statistics def getErrors(self): return self.factory.errors class AcsSqlFactoryDispatcher: """ a factory dispatcher which does statistics gathering and returns instances of AcsSqlFragment. originally this class was going to dispatch to the proper subclass of AcsSqlFragment, before my implementation of that class proved to me that it was not needed. still this does provide a useful place to gather statistics on db api usage """ def __init__(self, factory_map=None): if factory_map: self.factory_map = factory_map else: self.factory_map = DB_API_FACTORY_MAP self.errors = [] #nobody's perfect:( self.statistics = {} def dispatch(self, sqlstring): statistics = self.statistics try: index=string.index(sqlstring, ' ') except ValueError, e: self.errors.append(('couldn"t doing anything with it', str(e), sqlstring)) return None sql_type = string.strip(sqlstring[:index]) occurences=statistics.get(sql_type,0) if occurences == 0: statistics[sql_type]=1 else: statistics[sql_type]=occurences+1 SqlFragClass = self.factory_map.get(sql_type, None) if SqlFragClass: try: sql_frag = SqlFragClass(sqlstring) except SyntaxError, e: sql_frag = None self.errors.append(('not sure', str(e), sqlstring)) elif sql_type in DB_API_NULL_LIST: sql_frag = None else: #log('Error: couldn"t find match for 1%s1'%sqlstring[:index]) self.errors.append(('not found',string.strip(sqlstring[:index]))) sql_frag = None return sql_frag class AcsFileFinder: """ walks an acs 4 system looking for tcl files and groups them according to package. returns a list of AcsPackages """ def __init__(self, acs_root): self.acs_root = acs_root # why... self.pv_o = PathVisitor(dirtype=None,filetype='.tcl') self.pv_f = PathVisitor.visit # unbound method self.packages=[] self.file_count = 0 def findFiles(self, package_dir='acs-packages'): startdir = os.path.abspath(os.curdir) try: os.chdir(self.acs_root) os.chdir(package_dir) except: # oh the headaches i saved... os.chdir(startdir) package_dirs = os.listdir(os.curdir) for dir in package_dirs: # make sure its a directory if not os.path.isdir(dir) == 1: continue # make sure it looks like a package if not '%s.info'%dir in os.listdir(dir): continue os.path.walk(dir, self.pv_f, self.pv_o) files = self.pv_o.getCollectedFiles() self.pv_o.reset() self.file_count = self.file_count + len(files) self.packages.append(AcsPackage(dir, self.pv_o.getCollectedFiles())) os.chdir(startdir) return self.file_count def getPackages(self): return self.packages # not needed def getPackage(self, package_name): pass class PathVisitor: """ A Class intended for use with os.path.walk to collect files matching certain charterstics of file_extension, modtime, or size. currently only filetype is implemented. """ def __init__(self, dirtype=None, filetype=None, modtime=None, size=None): self.filetype = filetype self.modtime = modtime self.size = size self.dirtype = dirtype self.c_files = [] # collected files def visit(self, dir, files): if self.dirtype: if os.path.basename(dir) != self.dirtype: return for file in files: if os.path.isdir(file): continue if file[-len(self.filetype):] == self.filetype : #print file self.c_files.append(string.join( (dir, file), os.sep)) def getCollectedFiles(self): return self.c_files def reset(self): self.c_files = [] class AcsPackage: """ this is a container for both the tcl files and sql fragments found in an acs package. file path is package/dir{tcl|www}/[subdirs]filename.tcl """ def __init__(self, name, tcl_files): self.name = name self.tcl_files = tcl_files def getPackageName(self): return self.name def getAllTclFiles(self): return self.tcl_files # if we switch from package.query_name to the extended form def getTclLibraryFiles(self): pass def getTclWwwFiles(self): pass def setSqlFragments(self, frags): self.sql_fragments = frags def getSqlFragments(self): return self.sql_fragments class AcsSqlFragment: bind_re = re.compile('[ ,]:.*?[ ,)\n}]') def __init__(self, data): self.data = data self._initialize() def _initialize(self): data = self.data index_bracket = string.find(data, '{') index_double_quote = string.find(data,'"') if index_bracket > 0 and index_double_quote > 0: if index_double_quote < index_bracket: index = index_double_quote else: index = index_bracket elif index_bracket > 0: index = index_bracket elif index_double_quote > 0: index = index_double_quote else: log("can't find db api call type, "+self.data) raise StandardError("Should Not Happen: Can't find call type") headers = string.split(data[:index]) headers = map(string.strip, headers) if len(headers) == 2: self.type, self.query_name = headers elif len(headers) == 3: self.target_bind, self.type, self.query_name = headers else: raise SyntaxError(self.data) all_binds = self.bind_re.findall(data[index:]) binds = {} # same bind might get used more than once for bind in all_binds: if binds.has_key(bind[2:-1]): pass else: binds[bind[2:-1]]='' self.bind_variables = binds.keys() self.query = string.strip(data[index+1:-1]) def getType(self): return self.type def getQueryName(self): return self.query_name def getBindVariables(self): return self.bind_variables def getQuery(self): return self.query def getXmlString(self): try: xmlstr = """ %s %s %s \n"""%(self.query_name,self.type,self.query) for bindvar in self.bind_variables: xmlstr = xmlstr + " %s\n"%bindvar xmlstr = xmlstr + """ oracle 8.1.6 """ except: # most of these seem to be comments or calls with $sql... # that is going to problematic perhaps... #log("problem"+self.data) return '' return string.strip(xmlstr)+'\n' def _arg_parser(self): """ i screwed up... args to the db calls aren't being caught, need to fix.... better toplevel regexp hmm... on second thought this info isn't needed AFAICS """ pass class AcsSqlXmlWriter: """ simple xml writer. currently doing package oriented xml writing to a single output directory. 1 file per package """ def __init__(self, output_directory='test_dispatch_sql', acs_root=None): self.output_directory = output_directory def writePackageXML(self, package): start_directory = os.path.abspath(os.curdir) #os.chdir(acs_root) try: os.chdir(self.output_directory) except: try: os.makedirs(self.output_directory) os.chdir(self.output_directory) except: log("Fatal Exception in Writing XML Files: %s"%str(e)) sys.exit() #print 'writing in '+os.path.abspath(os.curdir) fh = open(package.getPackageName(), 'w') fh.write('''\n \n\n'''%package.getPackageName()) for sql_frag in package.getSqlFragments(): fh.write(sql_frag.getXmlString()) fh.write('''''') fh.close() os.chdir(start_directory) def log(message): LOGHANDLE.write(str(time.time())+ " " + str(message) + "\n\n") LOGHANDLE.flush() LOGFILE = '' LOGHANDLE = sys.stdout # the following represents non dql/dml public api of the db system. # i'm going to work on statistics gathering for their usage. DB_API_NULL_LIST = ('db_null', 'db_nullify_empty_string', 'db_quote', 'db_nextval', 'db_nth_pool_name', 'db_with_handle', 'db_exec_plsql','db_release_unused_handles', 'db_resultrows', 'db_transaction','db_abort_transaction', 'db_abort_transaction_p', 'db_continue_transaction','db_write_clob', 'db_write_blob', 'db_blob_get_file','db_register_pooled_sequence', 'db_get_sql_user','db_source_sql_file','db_source_sqlj_file', 'db_select_widget', 'db_null', 'db_call', 'db_url', 'db_text_file', 'db_getrow' ) # design changed once i realized one class could do it all # so the following isn't used DB_API_FACTORY_MAP = {'db_string':AcsSqlFragment, 'db_list':AcsSqlFragment, 'db_list_of_lists':AcsSqlFragment, 'db_exec_plsql':AcsSqlFragment, 'db_foreach':AcsSqlFragment, 'db_multirow':AcsSqlFragment, 'db_dml':AcsSqlFragment, 'db_0or1row':AcsSqlFragment, 'db_1row':AcsSqlFragment} # db api calls that work without args seem to be giving my regex's # no end of headaches (or was that just me). DB_API_MISCREANTS = ('db_null', 'db_release_unused_handles') def driver(acs_root, logfile=None, package_dir='acs-packages'): start_time = time.time() curdir = os.path.abspath(os.curdir) log('Starting ACS SQL Extractor') file_finder = AcsFileFinder(acs_root) log('Looking For Tcl Files Grouped By Packages') file_count=file_finder.findFiles() packages = file_finder.getPackages() log('Found %d Packages and %d Tcl Files'%(len(packages), file_count)) parser = AcsSqlParser() writer = AcsSqlXmlWriter('output') log('Starting Sql Extraction....') ## this is a little messy cause of the path switching... for package in packages: package_sql_frags = [] os.chdir(string.join( (acs_root, package_dir), os.sep)) files = package.getAllTclFiles() for file in files: package_sql_frags.extend(parser.parseFile(file)) package.setSqlFragments(package_sql_frags) os.chdir(acs_root) #log('Writing XML for %s'%package.getPackageName()) writer.writePackageXML(package) log('Writing Statistics') os.chdir(acs_root) os.chdir('output') fh = open('statistics', 'w') pprinter = pprint.PrettyPrinter(stream=fh) fh.write('Statistics For ACS Sql Extractor\n\n\n') fh.write('DB API Usage\n\n\n') pprinter.pprint(parser.getStatistics()) fh.write('\n\nErrors For ACS Sql Extractor\n\n\n') pprinter.pprint(parser.getErrors()) fh.close() # be nice os.chdir(curdir) log('ACS SQL Extractor Exiting: Total Running Time %s'%str(time.time()-start_time)) if __name__ == '__main__': print len(sys.argv), 'hi' if not len(sys.argv) == 2: print sys.argv, len(sys.argv) print "Usage %s acs4_package_root"%sys.argv[0] sys.exit(1) driver(sys.argv[1])