From 3c4b91272b9e4426bbcd7786c157a1dac2363e73 Mon Sep 17 00:00:00 2001 From: LtWorf Date: Tue, 9 Jun 2009 10:36:33 +0000 Subject: [PATCH] Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@190 014f5005-505e-4b48-8d0a-63407b615a7c --- CHANGELOG | 3 +- relational/optimizer.py | 249 +++------------------------------------- relational/parser.py | 239 +++++++++++++++++++++++++++++++++++++- 3 files changed, 252 insertions(+), 239 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 0715b77..d366b8f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -85,4 +85,5 @@ - Added stub for converting SQL to relational algebra - Implemented futile_union_intersection_subtraction general optimization - Implemented swap_rename_projection general optimization -- Replaced old relational algebra to python compiler with new one based on the new tokenizer/parser (Rev 188) \ No newline at end of file +- Replaced old relational algebra to python compiler with new one based on the new tokenizer/parser (Rev 188) +- Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible (Rev 190) \ No newline at end of file diff --git a/relational/optimizer.py b/relational/optimizer.py index 629dd8f..c43420d 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -22,240 +22,20 @@ For now it is highly experimental, and it shouldn't be used in 3rd party applications.''' import optimizations +import parser -RELATION=0 -UNARY=1 -BINARY=2 -b_operators=('*','-','ᑌ','ᑎ','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ') -u_operators=('π','σ','ρ') -op_functions={'*':'product','-':'difference','ᑌ':'union','ᑎ':'intersection','ᐅᐊ':'join','ᐅLEFTᐊ':'outer_left','ᐅRIGHTᐊ':'outer_right','ᐅFULLᐊ':'outer','π':'projection','σ':'selection','ρ':'rename'} - -class node (object): - '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations. - - The kind property says if the node is a binary operator, unary operator or relation. - Since relations are leaves, a relation node will have no attribute for children. - - If the node is a binary operator, it will have left and right properties. - - If the node is a unary operator, it will have a child, pointing to the child node and a prop containing - the string with the props of the operation.''' - kind=None - - def __init__(self,expression=None): - - if expression==None or len(expression)==0: - return - - '''Generates the tree from the tokenized expression''' - - #If the list contains only a list, it will consider the lower level list. - #This will allow things like ((((((a))))) to work - while len(expression)==1 and isinstance(expression[0],list): - expression=expression[0] - - #The list contains only 1 string. Means it is the name of a relation - if len(expression)==1 and isinstance(expression[0],str): - self.kind=RELATION - self.name=expression[0] - return - - '''Expression from right to left, searching for binary operators - this means that binary operators have lesser priority than - unary operators. - It find the operator with lesser priority, uses it as root of this - (sub)tree using everything on its left as left parameter (so building - a left subtree with the part of the list located on left) and doing - the same on right. - Since it searches for strings, and expressions into parenthesis are - within sub-lists, they won't be found here, ensuring that they will - have highest priority.''' - for i in range(len(expression)-1,-1,-1): - if expression[i] in b_operators: #Binary operator - self.kind=BINARY - self.name=expression[i] - self.left=node(expression[:i]) - self.right=node(expression[i+1:]) - return - '''Searches for unary operators, parsing from right to left''' - for i in range(len(expression)-1,-1,-1): - if expression[i] in u_operators: #Unary operator - self.kind=UNARY - self.name=expression[i] - self.prop=expression[1+i].strip() - self.child=node(expression[2+i]) - - return - pass - def toPython(self): - '''This method converts the expression into python code''' - if self.name in b_operators: - return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython()) - elif self.name in u_operators: - prop =self.prop - - #Converting parameters - if self.name=='π':#Projection - prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"') - elif self.name=="ρ": #Rename - prop='{\"%s\"}' % prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','') - else: #Selection - prop='\"%s\"' % prop - - return '%s.%s(%s)' % (self.child.toPython(),op_functions[self.name],prop) - else: - return self.name - pass - def result_format(self,rels): - '''This function returns a list containing the fields that the resulting relation will have. - Since it needs to know real instances of relations, it requires a dictionary where keys are - the names of the relations and the values are the relation objects.''' - if rels==None: - return - - if self.kind==RELATION: - return list(rels[self.name].header.attributes) - elif self.kind==BINARY and self.name in ('-','ᑌ','ᑎ'): - return self.left.result_format(rels) - elif self.name=='π': - l=[] - for i in self.prop.split(','): - l.append(i.strip()) - return l - elif self.name=='*': - return self.left.result_format(rels)+self.right.result_format(rels) - elif self.name=='σ' : - return self.child.result_format(rels) - elif self.name=='ρ': - _vars={} - for i in self.prop.split(','): - q=i.split('➡') - _vars[q[0].strip()]=q[1].strip() - - _fields=self.child.result_format(rels) - for i in range(len(_fields)): - if _fields[i] in _vars: - _fields[i]=_vars[_fields[i]] - return _fields - elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'): - return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels)))) - - - pass - - def __eq__(self,other): - if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind): - return False - - if self.kind==UNARY: - if other.prop!=self.prop: - return False - return self.child==other.child - if self.kind==BINARY: - return self.left==other.left and self.right==other.right - return True - def __str__(self): - if (self.kind==RELATION): - return self.name - elif (self.kind==UNARY): - return self.name + " "+ self.prop+ " (" + self.child.__str__() +")" - elif (self.kind==BINARY): - if self.left.kind==RELATION: - le=self.left.__str__() - else: - le="("+self.left.__str__()+")" - if self.right.kind==RELATION: - re=self.right.__str__() - else: - re="("+self.right.__str__()+")" - - return (le+ self.name +re) - -def tokenize(expression): - '''This function converts an expression into a list where - every token of the expression is an item of a list. Expressions into - parenthesis will be converted into sublists.''' - items=[] #List for the tokens - - '''This is a state machine. Initial status is determined by the starting of the - expression. There are the following statuses: - - relation: this is the status if the expressions begins with something else than an - operator or a parenthesis. - binary operator: this is the status when parsing a binary operator, nothing much to say - unary operator: this status is more complex, since it will be followed by a parameter AND a - sub-expression. - sub-expression: this status is entered when finding a '(' and will be exited when finding a ')'. - means that the others open must be counted to determine which close is the right one.''' - - expression=expression.strip() #Removes initial and endind spaces - state=0 - ''' - 0 initial and useless - 1 previous stuff was a relation - 2 previous stuff was a sub-expression - 3 previous stuff was a unary operator - 4 previous stuff was a binary operator - ''' - - while len(expression)>0: - if expression.startswith('('): #Parenthesis state - state=2 - par_count=0 #Count of parenthesis - end=0 - - for i in range(len(expression)): - if expression[i]=='(': - par_count+=1 - elif expression[i]==')': - par_count-=1 - if par_count==0: - end=i - break - #Appends the tokenization of the content of the parenthesis - items.append(tokenize(expression[1:end])) - #Removes the entire parentesis and content from the expression - expression=expression[end+1:].strip() - - elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes - items.append(expression[0:2]) #Adding operator in the top of the list - expression=expression[2:].strip() #Removing operator from the expression - par=expression.find('(') - - items.append(expression[:par]) #Inserting parameter of the operator - expression=expression[par:].strip() #Removing parameter from the expression - elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte - items.append(expression[0]) - expression=expression[1:].strip() #1 char from the expression - state=4 - elif expression.startswith("ᑎ") or expression.startswith("ᑌ"): #Binary short 3 bytes - items.append(expression[0:3]) #Adding operator in the top of the list - expression=expression[3:].strip() #Removing operator from the expression - - state=4 - elif expression.startswith("ᐅ"): #Binary long - i=expression.find("ᐊ") - items.append(expression[:i+3]) - expression=expression[i+3:].strip() - - state=4 - else: #Relation (hopefully) - if state==1: #Previous was a relation, appending to the last token - i=items.pop() - items.append(i+expression[0]) - expression=expression[1:].strip() #1 char from the expression - else: - state=1 - items.append(expression[0]) - expression=expression[1:].strip() #1 char from the expression - - return items - -def tree(expression): - '''This function parses a relational algebra expression into a tree and returns - the root node using the Node class defined in this module.''' - return node(tokenize(expression)) +#Stuff that was here before, keeping it for compatibility +RELATION=parser.RELATION +UNARY=parser.UNARY +BINARY=parser.BINARY +b_operators=parser.b_operators +u_operators=parser.u_operators +op_functions=parser.op_functions +node=parser.node +tokenize=parser.tokenize +tree=parser.tree +#End of the stuff def optimize_all(expression,rels): '''This function performs all the available optimizations''' @@ -281,7 +61,6 @@ def specific_optimize(expression,rels): total+=i(n,rels) #Performs the optimization return n.__str__() - def general_optimize(expression): '''This function performs general optimizations. Means that it will not need to know the fields used by the relations''' @@ -321,9 +100,7 @@ if __name__=="__main__": ''' σ skill=='C' (π id,name,chief,age (σ chief==i and age>a (ρ id➡i,age➡a(π id,age(people))*people)) ᐅᐊ skills) - (π id,name,chief,age (σ chief == i and age > a ((ρ age➡a,id➡i (π id,age (people)))*people)))ᐅᐊ(σ skill == 'C' (skills)) - - + (π id,name,chief,age (σ chief == i and age > a ((ρ age➡a,id➡i (π id,age (people)))*people)))ᐅᐊ(σ skill == 'C' (skills)) ''' #print specific_optimize("σ name==skill and age>21 and id==indice and skill=='C'(P1ᐅᐊS1)",rels) diff --git a/relational/parser.py b/relational/parser.py index 9a49705..d39acb8 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -18,7 +18,242 @@ # # author Salvo "LtWorf" Tomaselli -import optimizer + +RELATION=0 +UNARY=1 +BINARY=2 +b_operators=('*','-','ᑌ','ᑎ','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ') +u_operators=('π','σ','ρ') + +op_functions={'*':'product','-':'difference','ᑌ':'union','ᑎ':'intersection','ᐅᐊ':'join','ᐅLEFTᐊ':'outer_left','ᐅRIGHTᐊ':'outer_right','ᐅFULLᐊ':'outer','π':'projection','σ':'selection','ρ':'rename'} + +class node (object): + '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations. + + The kind property says if the node is a binary operator, unary operator or relation. + Since relations are leaves, a relation node will have no attribute for children. + + If the node is a binary operator, it will have left and right properties. + + If the node is a unary operator, it will have a child, pointing to the child node and a prop containing + the string with the props of the operation.''' + kind=None + + def __init__(self,expression=None): + + if expression==None or len(expression)==0: + return + + '''Generates the tree from the tokenized expression''' + + #If the list contains only a list, it will consider the lower level list. + #This will allow things like ((((((a))))) to work + while len(expression)==1 and isinstance(expression[0],list): + expression=expression[0] + + #The list contains only 1 string. Means it is the name of a relation + if len(expression)==1 and isinstance(expression[0],str): + self.kind=RELATION + self.name=expression[0] + return + + '''Expression from right to left, searching for binary operators + this means that binary operators have lesser priority than + unary operators. + It find the operator with lesser priority, uses it as root of this + (sub)tree using everything on its left as left parameter (so building + a left subtree with the part of the list located on left) and doing + the same on right. + Since it searches for strings, and expressions into parenthesis are + within sub-lists, they won't be found here, ensuring that they will + have highest priority.''' + for i in range(len(expression)-1,-1,-1): + if expression[i] in b_operators: #Binary operator + self.kind=BINARY + self.name=expression[i] + self.left=node(expression[:i]) + self.right=node(expression[i+1:]) + return + '''Searches for unary operators, parsing from right to left''' + for i in range(len(expression)-1,-1,-1): + if expression[i] in u_operators: #Unary operator + self.kind=UNARY + self.name=expression[i] + self.prop=expression[1+i].strip() + self.child=node(expression[2+i]) + + return + pass + def toPython(self): + '''This method converts the expression into python code''' + if self.name in b_operators: + return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython()) + elif self.name in u_operators: + prop =self.prop + + #Converting parameters + if self.name=='π':#Projection + prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"') + elif self.name=="ρ": #Rename + prop='{\"%s\"}' % prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','') + else: #Selection + prop='\"%s\"' % prop + + return '%s.%s(%s)' % (self.child.toPython(),op_functions[self.name],prop) + else: + return self.name + pass + def result_format(self,rels): + '''This function returns a list containing the fields that the resulting relation will have. + Since it needs to know real instances of relations, it requires a dictionary where keys are + the names of the relations and the values are the relation objects.''' + if rels==None: + return + + if self.kind==RELATION: + return list(rels[self.name].header.attributes) + elif self.kind==BINARY and self.name in ('-','ᑌ','ᑎ'): + return self.left.result_format(rels) + elif self.name=='π': + l=[] + for i in self.prop.split(','): + l.append(i.strip()) + return l + elif self.name=='*': + return self.left.result_format(rels)+self.right.result_format(rels) + elif self.name=='σ' : + return self.child.result_format(rels) + elif self.name=='ρ': + _vars={} + for i in self.prop.split(','): + q=i.split('➡') + _vars[q[0].strip()]=q[1].strip() + + _fields=self.child.result_format(rels) + for i in range(len(_fields)): + if _fields[i] in _vars: + _fields[i]=_vars[_fields[i]] + return _fields + elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'): + return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels)))) + + + pass + + def __eq__(self,other): + if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind): + return False + + if self.kind==UNARY: + if other.prop!=self.prop: + return False + return self.child==other.child + if self.kind==BINARY: + return self.left==other.left and self.right==other.right + return True + def __str__(self): + if (self.kind==RELATION): + return self.name + elif (self.kind==UNARY): + return self.name + " "+ self.prop+ " (" + self.child.__str__() +")" + elif (self.kind==BINARY): + if self.left.kind==RELATION: + le=self.left.__str__() + else: + le="("+self.left.__str__()+")" + if self.right.kind==RELATION: + re=self.right.__str__() + else: + re="("+self.right.__str__()+")" + + return (le+ self.name +re) + +def tokenize(expression): + '''This function converts an expression into a list where + every token of the expression is an item of a list. Expressions into + parenthesis will be converted into sublists.''' + items=[] #List for the tokens + + '''This is a state machine. Initial status is determined by the starting of the + expression. There are the following statuses: + + relation: this is the status if the expressions begins with something else than an + operator or a parenthesis. + binary operator: this is the status when parsing a binary operator, nothing much to say + unary operator: this status is more complex, since it will be followed by a parameter AND a + sub-expression. + sub-expression: this status is entered when finding a '(' and will be exited when finding a ')'. + means that the others open must be counted to determine which close is the right one.''' + + expression=expression.strip() #Removes initial and endind spaces + state=0 + ''' + 0 initial and useless + 1 previous stuff was a relation + 2 previous stuff was a sub-expression + 3 previous stuff was a unary operator + 4 previous stuff was a binary operator + ''' + + while len(expression)>0: + if expression.startswith('('): #Parenthesis state + state=2 + par_count=0 #Count of parenthesis + end=0 + + for i in range(len(expression)): + if expression[i]=='(': + par_count+=1 + elif expression[i]==')': + par_count-=1 + if par_count==0: + end=i + break + #Appends the tokenization of the content of the parenthesis + items.append(tokenize(expression[1:end])) + #Removes the entire parentesis and content from the expression + expression=expression[end+1:].strip() + + elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes + items.append(expression[0:2]) #Adding operator in the top of the list + expression=expression[2:].strip() #Removing operator from the expression + par=expression.find('(') + + items.append(expression[:par]) #Inserting parameter of the operator + expression=expression[par:].strip() #Removing parameter from the expression + elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte + items.append(expression[0]) + expression=expression[1:].strip() #1 char from the expression + state=4 + elif expression.startswith("ᑎ") or expression.startswith("ᑌ"): #Binary short 3 bytes + items.append(expression[0:3]) #Adding operator in the top of the list + expression=expression[3:].strip() #Removing operator from the expression + + state=4 + elif expression.startswith("ᐅ"): #Binary long + i=expression.find("ᐊ") + items.append(expression[:i+3]) + expression=expression[i+3:].strip() + + state=4 + else: #Relation (hopefully) + if state==1: #Previous was a relation, appending to the last token + i=items.pop() + items.append(i+expression[0]) + expression=expression[1:].strip() #1 char from the expression + else: + state=1 + items.append(expression[0]) + expression=expression[1:].strip() #1 char from the expression + + return items + +def tree(expression): + '''This function parses a relational algebra expression into a tree and returns + the root node using the Node class defined in this module.''' + return node(tokenize(expression)) + + def parse(expr): '''This function parses a relational algebra expression, converting it into python, @@ -50,7 +285,7 @@ def parse(expr): ρid➡i,name➡n(π a,b(A)) A ᐅᐊ B ''' - return optimizer.tree(expr).toPython() + return tree(expr).toPython() if __name__=="__main__": while True: