File contents
#!/usr/local/bin/python
import string
import re
import os
import sys
import time
import pprint
# global vars defined at bottom near driver function
class AcsSqlParser:
""" AcsSqlParser - parses a tcl file looking for calls to the
acs db api. returns a list of AcsSqlFragments which correspond
to calls the db api. """
# regular expressions
# this is the future one... but i need to add some logic to deal
# with these cases ie db_string foo $sql
#db_api = re.compile('db_.*?[[{"$].*?[]}"]', re.DOTALL)
db_api = re.compile('db_.*?[{"].*?[}"]', re.DOTALL)
tcl_comment = re.compile('#.*')
def __init__(self, acs_sql_map=None):
self.factory = AcsSqlFactoryDispatcher(acs_sql_map)
def parseFile(self, file_name):
fh = open(file_name)
results = self.parseString(fh.read())
fh.close()
return results
def parseString(self, tcl_file_string):
results = []
tcl_file_string = self.tcl_comment.sub('',tcl_file_string)
for miscreant in DB_API_MISCREANTS:
tcl_file_string=string.replace(tcl_file_string, miscreant, '')
sql_list = self.db_api.findall(tcl_file_string)
for sqlstring in sql_list:
results.append(self.factory.dispatch(sqlstring))
return filter(None,results)
def getStatistics(self):
""" oo is so much better when you can break it:)"""
return self.factory.statistics
def getErrors(self):
return self.factory.errors
class AcsSqlFactoryDispatcher:
""" a factory dispatcher which does statistics gathering and
returns instances of AcsSqlFragment. originally this class was going to
dispatch to the proper subclass of AcsSqlFragment, before my implementation
of that class proved to me that it was not needed. still this does
provide a useful place to gather statistics on db api usage """
def __init__(self, factory_map=None):
if factory_map:
self.factory_map = factory_map
else:
self.factory_map = DB_API_FACTORY_MAP
self.errors = [] #nobody's perfect:(
self.statistics = {}
def dispatch(self, sqlstring):
statistics = self.statistics
try:
index=string.index(sqlstring, ' ')
except ValueError, e:
self.errors.append(('couldn"t doing anything with it', str(e), sqlstring))
return None
sql_type = string.strip(sqlstring[:index])
occurences=statistics.get(sql_type,0)
if occurences == 0:
statistics[sql_type]=1
else:
statistics[sql_type]=occurences+1
SqlFragClass = self.factory_map.get(sql_type, None)
if SqlFragClass:
try:
sql_frag = SqlFragClass(sqlstring)
except SyntaxError, e:
sql_frag = None
self.errors.append(('not sure', str(e), sqlstring))
elif sql_type in DB_API_NULL_LIST:
sql_frag = None
else:
#log('Error: couldn"t find match for 1%s1'%sqlstring[:index])
self.errors.append(('not found',string.strip(sqlstring[:index])))
sql_frag = None
return sql_frag
class AcsFileFinder:
""" walks an acs 4 system looking for tcl files and groups them according
to package. returns a list of AcsPackages """
def __init__(self, acs_root):
self.acs_root = acs_root # why...
self.pv_o = PathVisitor(dirtype=None,filetype='.tcl')
self.pv_f = PathVisitor.visit # unbound method
self.packages=[]
self.file_count = 0
def findFiles(self, package_dir='acs-packages'):
startdir = os.path.abspath(os.curdir)
try:
os.chdir(self.acs_root)
os.chdir(package_dir)
except: # oh the headaches i saved...
os.chdir(startdir)
package_dirs = os.listdir(os.curdir)
for dir in package_dirs:
# make sure its a directory
if not os.path.isdir(dir) == 1: continue
# make sure it looks like a package
if not '%s.info'%dir in os.listdir(dir): continue
os.path.walk(dir, self.pv_f, self.pv_o)
files = self.pv_o.getCollectedFiles()
self.pv_o.reset()
self.file_count = self.file_count + len(files)
self.packages.append(AcsPackage(dir, self.pv_o.getCollectedFiles()))
os.chdir(startdir)
return self.file_count
def getPackages(self):
return self.packages
# not needed
def getPackage(self, package_name): pass
class PathVisitor:
""" A Class intended for use with os.path.walk to collect files
matching certain charterstics of file_extension, modtime, or size.
currently only filetype is implemented.
"""
def __init__(self, dirtype=None, filetype=None, modtime=None, size=None):
self.filetype = filetype
self.modtime = modtime
self.size = size
self.dirtype = dirtype
self.c_files = [] # collected files
def visit(self, dir, files):
if self.dirtype:
if os.path.basename(dir) != self.dirtype:
return
for file in files:
if os.path.isdir(file): continue
if file[-len(self.filetype):] == self.filetype :
#print file
self.c_files.append(string.join( (dir, file), os.sep))
def getCollectedFiles(self):
return self.c_files
def reset(self):
self.c_files = []
class AcsPackage:
""" this is a container for both the tcl files and sql fragments
found in an acs package.
file path is package/dir{tcl|www}/[subdirs]filename.tcl
"""
def __init__(self, name, tcl_files):
self.name = name
self.tcl_files = tcl_files
def getPackageName(self):
return self.name
def getAllTclFiles(self):
return self.tcl_files
# if we switch from package.query_name to the extended form
def getTclLibraryFiles(self): pass
def getTclWwwFiles(self): pass
def setSqlFragments(self, frags):
self.sql_fragments = frags
def getSqlFragments(self):
return self.sql_fragments
class AcsSqlFragment:
bind_re = re.compile('[ ,]:.*?[ ,)\n}]')
def __init__(self, data):
self.data = data
self._initialize()
def _initialize(self):
data = self.data
index_bracket = string.find(data, '{')
index_double_quote = string.find(data,'"')
if index_bracket > 0 and index_double_quote > 0:
if index_double_quote < index_bracket:
index = index_double_quote
else:
index = index_bracket
elif index_bracket > 0:
index = index_bracket
elif index_double_quote > 0:
index = index_double_quote
else:
log("can't find db api call type, "+self.data)
raise StandardError("Should Not Happen: Can't find call type")
headers = string.split(data[:index])
headers = map(string.strip, headers)
if len(headers) == 2:
self.type, self.query_name = headers
elif len(headers) == 3:
self.target_bind, self.type, self.query_name = headers
else:
raise SyntaxError(self.data)
all_binds = self.bind_re.findall(data[index:])
binds = {}
# same bind might get used more than once
for bind in all_binds:
if binds.has_key(bind[2:-1]): pass
else: binds[bind[2:-1]]=''
self.bind_variables = binds.keys()
self.query = string.strip(data[index+1:-1])
def getType(self):
return self.type
def getQueryName(self):
return self.query_name
def getBindVariables(self):
return self.bind_variables
def getQuery(self):
return self.query
def getXmlString(self):
try:
xmlstr = """
<query>
<query_name>%s</query_name>
<query_type>%s</query_type>
<query_text>
%s
</query_text>
<bindvars>\n"""%(self.query_name,self.type,self.query)
for bindvar in self.bind_variables:
xmlstr = xmlstr + " <bindvar>%s</bindvar>\n"%bindvar
xmlstr = xmlstr + """
<bindvars>
<rdbms>
<type>oracle</type>
<version>8.1.6</version>
</rdbms>
</query> """
except:
# most of these seem to be comments or calls with $sql...
# that is going to problematic perhaps...
#log("problem"+self.data)
return ''
return string.strip(xmlstr)+'\n'
def _arg_parser(self):
""" i screwed up... args to the db calls aren't being caught,
need to fix.... better toplevel regexp
hmm... on second thought this info isn't needed AFAICS
"""
pass
class AcsSqlXmlWriter:
""" simple xml writer. currently doing package oriented xml writing
to a single output directory. 1 file per package """
def __init__(self, output_directory='test_dispatch_sql', acs_root=None):
self.output_directory = output_directory
def writePackageXML(self, package):
start_directory = os.path.abspath(os.curdir)
#os.chdir(acs_root)
try:
os.chdir(self.output_directory)
except:
try:
os.makedirs(self.output_directory)
os.chdir(self.output_directory)
except:
log("Fatal Exception in Writing XML Files: %s"%str(e))
sys.exit()
#print 'writing in '+os.path.abspath(os.curdir)
fh = open(package.getPackageName(), 'w')
fh.write('''<?xml version="1.0"?>\n
<!DOCTYPE acssql PUBLIC "-//OpenACS 4//DTD acssql .20//EN "http://www.openacs.org">
<query_list package="%s">\n\n'''%package.getPackageName())
for sql_frag in package.getSqlFragments():
fh.write(sql_frag.getXmlString())
fh.write('''</query_list>''')
fh.close()
os.chdir(start_directory)
def log(message):
LOGHANDLE.write(str(time.time())+ " " + str(message) + "\n\n")
LOGHANDLE.flush()
LOGFILE = ''
LOGHANDLE = sys.stdout
# the following represents non dql/dml public api of the db system.
# i'm going to work on statistics gathering for their usage.
DB_API_NULL_LIST = ('db_null', 'db_nullify_empty_string',
'db_quote', 'db_nextval', 'db_nth_pool_name',
'db_with_handle', 'db_exec_plsql','db_release_unused_handles',
'db_resultrows', 'db_transaction','db_abort_transaction',
'db_abort_transaction_p', 'db_continue_transaction','db_write_clob',
'db_write_blob', 'db_blob_get_file','db_register_pooled_sequence',
'db_get_sql_user','db_source_sql_file','db_source_sqlj_file',
'db_select_widget', 'db_null', 'db_call',
'db_url', 'db_text_file', 'db_getrow'
)
# design changed once i realized one class could do it all
# so the following isn't used
DB_API_FACTORY_MAP = {'db_string':AcsSqlFragment,
'db_list':AcsSqlFragment,
'db_list_of_lists':AcsSqlFragment,
'db_exec_plsql':AcsSqlFragment,
'db_foreach':AcsSqlFragment,
'db_multirow':AcsSqlFragment,
'db_dml':AcsSqlFragment,
'db_0or1row':AcsSqlFragment,
'db_1row':AcsSqlFragment}
# db api calls that work without args seem to be giving my regex's
# no end of headaches (or was that just me).
DB_API_MISCREANTS = ('db_null', 'db_release_unused_handles')
def driver(acs_root, logfile=None, package_dir='acs-packages'):
start_time = time.time()
curdir = os.path.abspath(os.curdir)
log('Starting ACS SQL Extractor')
file_finder = AcsFileFinder(acs_root)
log('Looking For Tcl Files Grouped By Packages')
file_count=file_finder.findFiles()
packages = file_finder.getPackages()
log('Found %d Packages and %d Tcl Files'%(len(packages), file_count))
parser = AcsSqlParser()
writer = AcsSqlXmlWriter('output')
log('Starting Sql Extraction....')
## this is a little messy cause of the path switching...
for package in packages:
package_sql_frags = []
os.chdir(string.join( (acs_root, package_dir), os.sep))
files = package.getAllTclFiles()
for file in files:
package_sql_frags.extend(parser.parseFile(file))
package.setSqlFragments(package_sql_frags)
os.chdir(acs_root)
#log('Writing XML for %s'%package.getPackageName())
writer.writePackageXML(package)
log('Writing Statistics')
os.chdir(acs_root)
os.chdir('output')
fh = open('statistics', 'w')
pprinter = pprint.PrettyPrinter(stream=fh)
fh.write('Statistics For ACS Sql Extractor\n\n\n')
fh.write('DB API Usage\n\n\n')
pprinter.pprint(parser.getStatistics())
fh.write('\n\nErrors For ACS Sql Extractor\n\n\n')
pprinter.pprint(parser.getErrors())
fh.close()
# be nice
os.chdir(curdir)
log('ACS SQL Extractor Exiting: Total Running Time %s'%str(time.time()-start_time))
if __name__ == '__main__':
print len(sys.argv), 'hi'
if not len(sys.argv) == 2:
print sys.argv, len(sys.argv)
print "Usage %s acs4_package_root"%sys.argv[0]
sys.exit(1)
driver(sys.argv[1])