improved tree parser

git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@74 014f5005-505e-4b48-8d0a-63407b615a7c
master
LtWorf 2008-12-11 22:36:45 +07:00
parent f5e16a0cf4
commit d59ba9f9f1
1 changed files with 87 additions and 10 deletions

@ -26,6 +26,9 @@ BINARY=2
class node (object):
'''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.'''
def __init__(self,expression):
expression=expression.strip()
print "Parsing: ",expression
'''expression must be a valid relational algrbra expression that would be accepted by the parser
and must be utf16'''
self.kind=0
@ -33,11 +36,77 @@ class node (object):
self.prop=""
'''*-ᑎᐅᐊᐅLEFTᐊᐅRIGHTᐊᐅFULLᐊπσρ'''
binary=(u"*",u"-",u"",u"")
unary=(u"π",u"σ",u"ρ")
'''(a (a b c d)) ᑎ c - σ i==3(πa,b(a b ᑎ c))'''
level=0 #Current parentesis level
start=-1 #Start of the parentesis
end=-1 #End of the parentesis.
tokens=list(expression) #Splitted expression
r=range(len(tokens))
r.reverse()
lev_non_zero_chars=0 #Number of chars inside parentesis
for i in r: #Parses expression from end to begin, to preserve operation's order
if tokens[i]==u"(":
if level==0:
start=i
print start
level+=1
elif tokens[i]==u")":
level-=1
if level==0:
end=i
print end
if level!=0:
lev_non_zero_chars+=1
if i==0 and level==0 and tokens[i] in unary: #Unary operator found, must grab its parameters and its child relation they
child=""
for q in tokens[start+1:end]:
child+=q
self.name= tokens[i]
print "-----",tokens[i]
print "---",start,end,lev_non_zero_chars
print child
#print prop
#self.child=node(child)
if level==0 and tokens[i] in binary: #Binary operator found, everything on left will go in the left subree and everhthing on the right will go in the right subtree
self.kind=BINARY
left=""
right=""
if start==end==-1:#No parentesis before
end=i
for q in tokens[start+1:end]:
left+=q
self.name= tokens[i]
for q in tokens[i+1:]:
right+=q
print "self: ",tokens[i]
print "left: ",left
print "right:" ,right
self.left=node(left)
self.right=node(right)
return
for i in list(expression):
print i
if lev_non_zero_chars!=0 and lev_non_zero_chars+1==len(expression):#Expression is entirely contained in parentesis, removing them
n=node(expression[1:-1])
self.name=n.name
self.kind=n.kind
if n.kind==UNARY:
self.child=n.child
elif n.kind==BINARY:
self.left=n.left
self.right=n.right
self.prop=n.prop
return
self.kind=RELATION
self.name=expression
def __str__(self):
if (self.kind==RELATION):
@ -45,11 +114,19 @@ class node (object):
elif (self.kind==UNARY):
return self.name + " "+ self.prop+ " (" + self.child +")"
elif (self.kind==BINARY):
return "("+ self.left + ") " + self.name + " (" + self.right+ ")"
if self.left.kind==RELATION:
left=self.left.__str__()
else:
left=u"("+self.left.__str__()+u")"
if self.right.kind==RELATION:
right=self.right.__str__()
else:
right=u"("+self.right.__str__()+u")"
return (left+ self.name +right)
n=node(u"(a b) c d")
print n
if __name__=="__main__":
#n=node(u"((a b) - c d) - b")
#n=node(u"((((((((((((2)))))))))))) - (3 * 5) - 2")
n=node(u"π a,b (d-a*b)")
print n.__str__()