1
This commit is contained in:
140
devtools/dlexer.py
Normal file
140
devtools/dlexer.py
Normal file
@ -0,0 +1,140 @@
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class DLexToken:
|
||||
"""
|
||||
DLexToken contains:
|
||||
'id' - the ID of the token (match with the value DLexer.AddToken returns).
|
||||
'val' - the token's string.
|
||||
'lineNumber' - the line the token was encountered on.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class DLexState:
|
||||
pass
|
||||
|
||||
|
||||
class DLexer:
|
||||
"""
|
||||
DLex is a simple lexer simulator. Here is how to use it.
|
||||
|
||||
1. Call AddToken to add the regular expressions that it will parse. Add them in
|
||||
order of precedence. Store the value returned from AddToken so you can compare
|
||||
it to the token ID returned by GetToken to determine what kind of token was found.
|
||||
|
||||
2. Call BeginRead or BeginReadFile to setup the initial file.
|
||||
|
||||
3. Repeatedly call GetToken.
|
||||
If it returns None, then there are no more tokens that match your specifications.
|
||||
If it returns a value, then it is a DLexToken with.
|
||||
"""
|
||||
|
||||
def __init__( self, bSkipWhitespace=1 ):
|
||||
self.__tokens = []
|
||||
self.__curTokenID = 0
|
||||
self.__notnewline = re.compile( '[^\\r\\n]*' )
|
||||
|
||||
self.__bSkipWhitespace = bSkipWhitespace
|
||||
if bSkipWhitespace:
|
||||
self.__whitespace = re.compile( '[ \\t\\f\\v]+' )
|
||||
self.__newline = re.compile( '[\\r\\n]' )
|
||||
|
||||
|
||||
def GetErrorTokenID( self ):
|
||||
return -1
|
||||
|
||||
|
||||
def AddToken( self, expr, flags=0 ):
|
||||
tokenID = self.__curTokenID
|
||||
self.__tokens.append( [tokenID, re.compile( expr, flags )] )
|
||||
self.__curTokenID += 1
|
||||
return tokenID
|
||||
|
||||
|
||||
# Store and restore the state.
|
||||
def BackupState( self ):
|
||||
ret = DLexState()
|
||||
ret.lineNumber = self.__lineNumber
|
||||
ret.currentCharacter = self.__currentCharacter
|
||||
ret.fileLen = self.__fileLen
|
||||
return ret
|
||||
|
||||
def RestoreState( self, state ):
|
||||
self.__lineNumber = state.lineNumber
|
||||
self.__currentCharacter = state.currentCharacter
|
||||
self.__fileLen = state.fileLen
|
||||
|
||||
|
||||
def BeginRead( self, str ):
|
||||
self.__curString = str
|
||||
self.__lineNumber = 1
|
||||
self.__currentCharacter = 0
|
||||
self.__fileLen = len( str )
|
||||
|
||||
|
||||
def BeginReadFile( self, fileName ):
|
||||
file = open( fileName, 'r' )
|
||||
self.BeginRead( file.read() )
|
||||
file.close()
|
||||
|
||||
|
||||
def GetToken( self ):
|
||||
# Skip whitespace.
|
||||
self.__SkipWhitespace()
|
||||
|
||||
# Now return the first token that we have a match for.
|
||||
for token in self.__tokens:
|
||||
m = token[1].match( self.__curString, self.__currentCharacter )
|
||||
if m:
|
||||
ret = DLexToken()
|
||||
ret.id = token[0]
|
||||
ret.val = self.__curString[ m.start() : m.end() ]
|
||||
ret.lineNumber = self.__lineNumber
|
||||
self.__currentCharacter = m.end()
|
||||
return ret
|
||||
|
||||
if self.__currentCharacter < self.__fileLen:
|
||||
print "NO MATCH FOR '%s'" % self.__curString[ self.__currentCharacter : self.__currentCharacter+35 ]
|
||||
ret = DLexToken()
|
||||
ret.id = self.GetErrorTokenID()
|
||||
ret.val = self.__curString[ self.__currentCharacter : ]
|
||||
self.__currentCharacter = self.__fileLen
|
||||
return ret
|
||||
#print "%d" % t
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def GetLineNumber( self ):
|
||||
return self.__lineNumber
|
||||
|
||||
|
||||
def GetPercentComplete( self ):
|
||||
return (self.__currentCharacter * 100) / self.__fileLen
|
||||
|
||||
|
||||
def GetLineContents( self ):
|
||||
m = self.__notnewline.match( self.__curString, self.__currentCharacter )
|
||||
if m:
|
||||
return self.__curString[ m.start() : m.end() ]
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def __SkipWhitespace( self ):
|
||||
if self.__bSkipWhitespace:
|
||||
while 1:
|
||||
a = self.__whitespace.match( self.__curString, self.__currentCharacter )
|
||||
b = self.__newline.match( self.__curString, self.__currentCharacter )
|
||||
if a:
|
||||
self.__currentCharacter = a.end()
|
||||
continue
|
||||
elif b:
|
||||
self.__currentCharacter = b.end()
|
||||
self.__lineNumber += 1
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
Reference in New Issue
Block a user