You are not logged in Log in Join
You are here: Home » Members » k_vertigo » openacs » old » AcsSqlExtractor.py » View File

Log in
Name

Password

 

AcsSqlExtractor.py

File details
Size
13 K
File type
text/x-python

File contents

#!/usr/local/bin/python

import string
import re
import os
import sys
import time
import pprint

# global vars defined at bottom near driver function


class AcsSqlParser:    
    """ AcsSqlParser - parses a tcl file looking for calls to the
    acs db api. returns a list of AcsSqlFragments which correspond
    to calls the db api. """

    # regular expressions
    # this is the future one... but i need to add some logic to deal
    # with these cases ie db_string foo $sql
    #db_api = re.compile('db_.*?[[{"$].*?[]}"]', re.DOTALL)
    
    db_api = re.compile('db_.*?[{"].*?[}"]', re.DOTALL)    
        
    tcl_comment = re.compile('#.*')
    
    def __init__(self, acs_sql_map=None):
        self.factory = AcsSqlFactoryDispatcher(acs_sql_map)
        
    def parseFile(self, file_name):
        fh = open(file_name)

        results = self.parseString(fh.read())

        fh.close()
        return results

    def parseString(self, tcl_file_string):
        results = []
        
        tcl_file_string = self.tcl_comment.sub('',tcl_file_string)
        for miscreant in DB_API_MISCREANTS:
            tcl_file_string=string.replace(tcl_file_string, miscreant, '')
            
        sql_list = self.db_api.findall(tcl_file_string)

        for sqlstring in sql_list:
            results.append(self.factory.dispatch(sqlstring))

        return filter(None,results)

    def getStatistics(self):
        
        """ oo is so much better when you can break it:)"""
        return self.factory.statistics
    
    def getErrors(self):
        return self.factory.errors
    
class AcsSqlFactoryDispatcher:
    """ a factory dispatcher which does statistics gathering and
    returns instances of AcsSqlFragment. originally this class was going to 
    dispatch to the proper subclass of AcsSqlFragment, before my implementation
    of that class proved to me that it was not needed. still this does 
    provide a useful place to gather statistics on db api usage """
    
 
    def __init__(self, factory_map=None):
        if factory_map:
            self.factory_map = factory_map
        else:
            self.factory_map = DB_API_FACTORY_MAP

        self.errors = []  #nobody's perfect:(            
        self.statistics = {}

        
    def dispatch(self, sqlstring):
        
        statistics = self.statistics        
        try:
            index=string.index(sqlstring, ' ')        
        except ValueError, e:
            self.errors.append(('couldn"t doing anything with it', str(e), sqlstring))
            return None
        
        sql_type = string.strip(sqlstring[:index])
        
        occurences=statistics.get(sql_type,0)
        if occurences == 0:
            statistics[sql_type]=1
        else:
            statistics[sql_type]=occurences+1

        SqlFragClass = self.factory_map.get(sql_type, None)

        if SqlFragClass:

            try:
                sql_frag = SqlFragClass(sqlstring)
            except SyntaxError, e:
                sql_frag = None
                self.errors.append(('not sure', str(e), sqlstring))
            
        elif sql_type in DB_API_NULL_LIST:
            sql_frag = None
        else:
            #log('Error: couldn"t find match for 1%s1'%sqlstring[:index])
            self.errors.append(('not found',string.strip(sqlstring[:index])))
            sql_frag = None
                
        return sql_frag
                
class AcsFileFinder:
    """ walks an acs 4 system looking for tcl files and groups them according
    to package. returns a list of AcsPackages """
    
    def __init__(self, acs_root): 
        self.acs_root = acs_root        # why...
        self.pv_o = PathVisitor(dirtype=None,filetype='.tcl')
        self.pv_f = PathVisitor.visit # unbound method
        self.packages=[]
        self.file_count = 0
        
    def findFiles(self, package_dir='acs-packages'): 
        startdir = os.path.abspath(os.curdir)
        
        try:
            os.chdir(self.acs_root)
            os.chdir(package_dir)
        except: # oh the headaches i saved...
            os.chdir(startdir)

        package_dirs = os.listdir(os.curdir)
        
        for dir in package_dirs:
            # make sure its a directory 
            if not os.path.isdir(dir) == 1: continue            
            # make sure it looks like a package
            if not '%s.info'%dir in os.listdir(dir): continue
            
            os.path.walk(dir, self.pv_f, self.pv_o)
            files = self.pv_o.getCollectedFiles()
            self.pv_o.reset()
            self.file_count = self.file_count + len(files)
            self.packages.append(AcsPackage(dir, self.pv_o.getCollectedFiles()))

        os.chdir(startdir)
        
        return self.file_count
    
    def getPackages(self):        
        return self.packages    

    # not needed 
    def getPackage(self, package_name): pass 

class PathVisitor:
    """ A Class intended for use with os.path.walk to collect files
    matching certain charterstics of file_extension, modtime, or size.
    
    currently only filetype is implemented.
  
    """
    def __init__(self, dirtype=None, filetype=None, modtime=None, size=None):
 
        self.filetype = filetype
        self.modtime = modtime
        self.size = size
        self.dirtype = dirtype
        self.c_files = [] # collected files
 
    def visit(self, dir, files):

        if self.dirtype:
            if os.path.basename(dir) != self.dirtype:                
                return
            
        for file in files:
            if os.path.isdir(file): continue

            if file[-len(self.filetype):] == self.filetype : 
                #print file
                self.c_files.append(string.join( (dir, file), os.sep))
 
    def getCollectedFiles(self):
        return self.c_files

    def reset(self):
        self.c_files = []
    
class AcsPackage:
    """ this is a container for both the tcl files and sql fragments
    found in an acs package. 

    file path is package/dir{tcl|www}/[subdirs]filename.tcl
    
    """
    
    def __init__(self, name, tcl_files): 
        self.name = name        
        self.tcl_files = tcl_files
    
    def getPackageName(self): 
        return self.name

    def getAllTclFiles(self): 
        return self.tcl_files

    # if we switch from package.query_name to the extended form
    def getTclLibraryFiles(self): pass
    def getTclWwwFiles(self): pass

    def setSqlFragments(self, frags): 
        self.sql_fragments = frags
        
    def getSqlFragments(self):
        return self.sql_fragments
    

class AcsSqlFragment:
    
    bind_re = re.compile('[ ,]:.*?[ ,)\n}]')

    def __init__(self, data):         
        self.data = data
        self._initialize()

    def _initialize(self):
        data = self.data
        index_bracket = string.find(data, '{')
        index_double_quote = string.find(data,'"')
        if index_bracket > 0 and index_double_quote > 0:
            if index_double_quote < index_bracket:
                index = index_double_quote
            else:
                index = index_bracket
        elif index_bracket > 0:
            index = index_bracket
        elif index_double_quote > 0:
            index = index_double_quote
        else:
            log("can't find db api call type, "+self.data)
            raise StandardError("Should Not Happen: Can't find call type")
              
        headers = string.split(data[:index])        
        headers = map(string.strip, headers)
        
        if len(headers) == 2:
            self.type, self.query_name = headers
        elif len(headers) == 3:
            self.target_bind, self.type, self.query_name = headers
        else:
            raise SyntaxError(self.data)
        
        all_binds = self.bind_re.findall(data[index:])        
        binds = {}
        # same bind might get used more than once        
        for bind in all_binds:
            if binds.has_key(bind[2:-1]): pass
            else: binds[bind[2:-1]]=''            
        self.bind_variables = binds.keys()                         
        self.query = string.strip(data[index+1:-1])
        
    def getType(self): 
        return self.type
        
    def getQueryName(self):
        return self.query_name
    
    def getBindVariables(self): 
        return self.bind_variables    

    def getQuery(self):
        return self.query
    
    def getXmlString(self):
        try:
            xmlstr = """
<query>
      <query_name>%s</query_name>
      <query_type>%s</query_type>
      <query_text>
      %s
      </query_text>
      <bindvars>\n"""%(self.query_name,self.type,self.query)
        
            for bindvar in self.bind_variables:
                xmlstr = xmlstr + "               <bindvar>%s</bindvar>\n"%bindvar
            xmlstr = xmlstr + """
      <bindvars>
      <rdbms>
               <type>oracle</type>
               <version>8.1.6</version>
      </rdbms>
</query> """
        except:
            # most of these seem to be comments or calls with $sql...
            # that is going to problematic perhaps...
            #log("problem"+self.data)
            return ''
        return string.strip(xmlstr)+'\n'
    
    def _arg_parser(self):
        """ i screwed up... args to the db calls aren't being caught, 
        need to fix.... better toplevel regexp

        hmm... on second thought this info isn't needed AFAICS

        """
        pass
        
               
class AcsSqlXmlWriter:
    """ simple xml writer. currently doing package oriented xml writing
    to a single output directory. 1 file per package """

    def __init__(self, output_directory='test_dispatch_sql', acs_root=None): 
        self.output_directory = output_directory

    def writePackageXML(self, package):
        start_directory = os.path.abspath(os.curdir)
        #os.chdir(acs_root)

        try:
            os.chdir(self.output_directory)
        except:
            try:
                os.makedirs(self.output_directory)
                os.chdir(self.output_directory)
            except:
                log("Fatal Exception in Writing XML Files: %s"%str(e))
                sys.exit()

        #print 'writing in '+os.path.abspath(os.curdir)
        
        fh = open(package.getPackageName(), 'w')
        fh.write('''<?xml version="1.0"?>\n
<!DOCTYPE acssql PUBLIC "-//OpenACS 4//DTD acssql .20//EN "http://www.openacs.org">
<query_list package="%s">\n\n'''%package.getPackageName())
        

        for sql_frag in package.getSqlFragments():
            fh.write(sql_frag.getXmlString())
            
        fh.write('''</query_list>''')
        fh.close()
        
        os.chdir(start_directory)
    
def log(message):
    LOGHANDLE.write(str(time.time())+ " " + str(message) + "\n\n")
    LOGHANDLE.flush()

LOGFILE = ''
LOGHANDLE = sys.stdout

# the following represents non dql/dml public api of the db system.
# i'm going to work on statistics gathering for their usage.

DB_API_NULL_LIST = ('db_null', 'db_nullify_empty_string',
            'db_quote', 'db_nextval', 'db_nth_pool_name',
            'db_with_handle', 'db_exec_plsql','db_release_unused_handles',
            'db_resultrows', 'db_transaction','db_abort_transaction',
            'db_abort_transaction_p', 'db_continue_transaction','db_write_clob',
            'db_write_blob', 'db_blob_get_file','db_register_pooled_sequence',
            'db_get_sql_user','db_source_sql_file','db_source_sqlj_file',
                    
             'db_select_widget', 'db_null', 'db_call',
             'db_url', 'db_text_file', 'db_getrow'
            )

# design changed once i realized one class could do it all
# so the following isn't used

DB_API_FACTORY_MAP = {'db_string':AcsSqlFragment,               
               'db_list':AcsSqlFragment,
               'db_list_of_lists':AcsSqlFragment,
               'db_exec_plsql':AcsSqlFragment,
               'db_foreach':AcsSqlFragment,
               'db_multirow':AcsSqlFragment,
               'db_dml':AcsSqlFragment,
               'db_0or1row':AcsSqlFragment,
               'db_1row':AcsSqlFragment}    

# db api calls that work without args seem to be giving my regex's
# no end of headaches (or was that just me).
DB_API_MISCREANTS = ('db_null', 'db_release_unused_handles')
    

def driver(acs_root, logfile=None, package_dir='acs-packages'):
    
    start_time = time.time()
    curdir = os.path.abspath(os.curdir)
    
    log('Starting ACS SQL Extractor')
        
    file_finder = AcsFileFinder(acs_root)

    log('Looking For Tcl Files Grouped By Packages')

    file_count=file_finder.findFiles()

    packages = file_finder.getPackages()

    log('Found %d Packages and %d Tcl Files'%(len(packages), file_count))
    
    parser = AcsSqlParser()
    writer = AcsSqlXmlWriter('output')
    
    log('Starting Sql Extraction....')
    
    ## this is a little messy cause of the path switching...    
    for package in packages:
        
        package_sql_frags = []
        
        os.chdir(string.join( (acs_root, package_dir), os.sep))
        
        files = package.getAllTclFiles()

        for file in files:
            package_sql_frags.extend(parser.parseFile(file))

        package.setSqlFragments(package_sql_frags)
        
        os.chdir(acs_root)

        #log('Writing XML for %s'%package.getPackageName())        
        
        writer.writePackageXML(package)

    log('Writing Statistics')

    os.chdir(acs_root)    
    os.chdir('output')
    
    fh = open('statistics', 'w')
    pprinter = pprint.PrettyPrinter(stream=fh)    
    fh.write('Statistics For ACS Sql Extractor\n\n\n')
    fh.write('DB API Usage\n\n\n')    
    pprinter.pprint(parser.getStatistics())
    fh.write('\n\nErrors For ACS Sql Extractor\n\n\n')    
    pprinter.pprint(parser.getErrors())
    fh.close()
    
    # be nice
    os.chdir(curdir)
           
    log('ACS SQL Extractor Exiting: Total Running Time %s'%str(time.time()-start_time))
        

if __name__  == '__main__':
    print len(sys.argv), 'hi'
    if not len(sys.argv) == 2:
        print sys.argv, len(sys.argv)
        print "Usage %s acs4_package_root"%sys.argv[0]
        sys.exit(1)
        
    driver(sys.argv[1])