diff --git a/optimizer.py b/optimizer.py index 608a5c1..50bdb1d 100644 --- a/optimizer.py +++ b/optimizer.py @@ -26,6 +26,9 @@ BINARY=2 class node (object): '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.''' def __init__(self,expression): + expression=expression.strip() + + print "Parsing: ",expression '''expression must be a valid relational algrbra expression that would be accepted by the parser and must be utf16''' self.kind=0 @@ -33,11 +36,77 @@ class node (object): self.prop="" '''*-ᑌᑎᐅᐊᐅLEFTᐊᐅRIGHTᐊᐅFULLᐊπσρ''' + binary=(u"*",u"-",u"ᑌ",u"ᑎ") + unary=(u"π",u"σ",u"ρ") '''(a ᑌ (a ᑌ b ᑌ c ᑌ d)) ᑎ c - σ i==3(πa,b(aᑌ b ᑎ c))''' + level=0 #Current parentesis level + start=-1 #Start of the parentesis + end=-1 #End of the parentesis. + tokens=list(expression) #Splitted expression + r=range(len(tokens)) + r.reverse() + lev_non_zero_chars=0 #Number of chars inside parentesis + for i in r: #Parses expression from end to begin, to preserve operation's order + if tokens[i]==u"(": + if level==0: + start=i + print start + level+=1 + elif tokens[i]==u")": + level-=1 + if level==0: + end=i + print end + + if level!=0: + lev_non_zero_chars+=1 + + if i==0 and level==0 and tokens[i] in unary: #Unary operator found, must grab its parameters and its child relation they + child="" + for q in tokens[start+1:end]: + child+=q + self.name= tokens[i] + print "-----",tokens[i] + print "---",start,end,lev_non_zero_chars + print child + #print prop + #self.child=node(child) + + if level==0 and tokens[i] in binary: #Binary operator found, everything on left will go in the left subree and everhthing on the right will go in the right subtree + self.kind=BINARY + left="" + right="" + + if start==end==-1:#No parentesis before + end=i + + for q in tokens[start+1:end]: + left+=q + self.name= tokens[i] + for q in tokens[i+1:]: + right+=q + print "self: ",tokens[i] + print "left: ",left + print "right:" ,right + self.left=node(left) + self.right=node(right) + + return - for i in list(expression): - print i - + if lev_non_zero_chars!=0 and lev_non_zero_chars+1==len(expression):#Expression is entirely contained in parentesis, removing them + n=node(expression[1:-1]) + self.name=n.name + self.kind=n.kind + if n.kind==UNARY: + self.child=n.child + elif n.kind==BINARY: + self.left=n.left + self.right=n.right + self.prop=n.prop + return + + self.kind=RELATION + self.name=expression def __str__(self): if (self.kind==RELATION): @@ -45,11 +114,19 @@ class node (object): elif (self.kind==UNARY): return self.name + " "+ self.prop+ " (" + self.child +")" elif (self.kind==BINARY): - return "("+ self.left + ") " + self.name + " (" + self.right+ ")" + if self.left.kind==RELATION: + left=self.left.__str__() + else: + left=u"("+self.left.__str__()+u")" + if self.right.kind==RELATION: + right=self.right.__str__() + else: + right=u"("+self.right.__str__()+u")" + + return (left+ self.name +right) - - - - -n=node(u"(a ᑌ b) ᑌ c ᑌ d") -print n \ No newline at end of file +if __name__=="__main__": + #n=node(u"((a ᑌ b) - c ᑌ d) - b") + #n=node(u"((((((((((((2)))))))))))) - (3 * 5) - 2") + n=node(u"π a,b (d-a*b)") + print n.__str__() \ No newline at end of file