diff --git a/relational/optimizations.py b/relational/optimizations.py index f374c2a..c9ca62c 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -452,7 +452,7 @@ def swap_rename_select(n): def select_union_intersect_subtract(n): '''This function locates things like σ i(a) ᑌ σ q(a) and replaces them with σ (i OR q) (a) - Removing a O² operation like the union''' + Removing a O(n²) operation like the union''' changes=0 if n.name in ('ᑌ', 'ᑎ', '-') and n.left.name=='σ' and n.right.name=='σ' and n.left.child==n.right.child: cahnges=1 diff --git a/relational/optimizer.py b/relational/optimizer.py index efaddf1..bd38a9f 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -18,8 +18,12 @@ # # author Salvo "LtWorf" Tomaselli -'''This module optimizes relational expressions into ones that require less time to be executed -For now it is highly experimental, and it shouldn't be used in 3rd party applications.''' +'''This module optimizes relational expressions into ones that require less time to be executed. + +expression: In all the functions expression can be either an UTF-8 encoded string, containing a valid +relational query, or it can be a parse tree for a relational expression (ie: class parser.node). +The functions will always return a string with the optimized query, but if a parse tree was provided, +the parse tree itself will be modified accordingly.''' import optimizations import parser @@ -37,53 +41,64 @@ tokenize=parser.tokenize tree=parser.tree #End of the stuff -def optimize_all(expression,rels): - '''This function performs all the available optimizations +def optimize_all(expression,rels,specific=True,general=True,debug=None): + '''This function performs all the available optimizations. - Expression is the UTF-8 encoded string containing the expression to optimize - rels is a dictionary like {'name':relation} + expression : see documentation of this module + rels: dic with relation name as key, and relation istance as value + specific: True if it has to perform specific optimizations + general: True if it has to perform general optimizations + debug: if a list is provided here, after the end of the function, it + will contain the query repeated many times to show the performed + steps. Of course don't call optimize_all(... ,debug=[]) because it + would make no sense Return value: this will return an optimized version of the expression''' - n=tree(expression) #Gets the tree + if isinstance(expression,str): + n=tree(expression) #Gets the tree + elif isinstance(expression,node): + n=expression + else: + raise (TypeError("expression must be a string or a node")) + + if isinstance(debug,list): + dbg=True + else: + dbg=False + total=1 while total!=0: total=0 - for i in optimizations.specific_optimizations: - total+=i(n,rels) #Performs the optimization - for i in optimizations.general_optimizations: - total+=i(n) #Performs the optimization + if specific: + for i in optimizations.specific_optimizations: + res=i(n,rels) #Performs the optimization + if res!=0 and dbg: debug.append(n.__str__()) + total+=res + if general: + for i in optimizations.general_optimizations: + res=i(n) #Performs the optimization + if res!=0 and dbg: debug.append(n.__str__()) + total+=res return n.__str__() def specific_optimize(expression,rels): '''This function performs specific optimizations. Means that it will need to - know the fields used by the relations + know the fields used by the relations. - Expression is the UTF-8 encoded string containing the expression to optimize - rels is a dictionary like {'name':relation} + expression : see documentation of this module + rels: dic with relation name as key, and relation istance as value Return value: this will return an optimized version of the expression''' - n=tree(expression) #Gets the tree - total=1 - while total!=0: - total=0 - for i in optimizations.specific_optimizations: - total+=i(n,rels) #Performs the optimization - return n.__str__() + return optimize_all(expression,rels,specific=True,general=False) def general_optimize(expression): '''This function performs general optimizations. Means that it will not need to know the fields used by the relations - Expression is the UTF-8 encoded string containing the expression to optimize + expression : see documentation of this module Return value: this will return an optimized version of the expression''' - n=tree(expression) #Gets the tree - total=1 - while total!=0: - total=0 - for i in optimizations.general_optimizations: - total+=i(n) #Performs the optimization - return n.__str__() + return optimize_all(expression,None,specific=False,general=True) if __name__=="__main__": #n=node(u"((a ᑌ b) - c ᑌ d) - b") diff --git a/test/people_join_select_args_on_both_tables.query b/test/people_join_select_args_on_both_tables.query new file mode 100644 index 0000000..2a10884 --- /dev/null +++ b/test/people_join_select_args_on_both_tables.query @@ -0,0 +1 @@ +σ skill=='C' and age<25 and skill!=name(people ᐅᐊ skills) diff --git a/test/people_join_select_args_on_both_tables.result b/test/people_join_select_args_on_both_tables.result new file mode 100644 index 0000000..57c4154 --- /dev/null +++ b/test/people_join_select_args_on_both_tables.result @@ -0,0 +1,2 @@ +id,name,chief,age,skill +0,jack,0,22,C diff --git a/test/redoundant_union_select.query b/test/redoundant_union_select.query new file mode 100644 index 0000000..c2d5963 --- /dev/null +++ b/test/redoundant_union_select.query @@ -0,0 +1 @@ +σ (id==2) (σ age>5 (people ᑌ people)) diff --git a/test/redoundant_union_select.result b/test/redoundant_union_select.result new file mode 100644 index 0000000..9408a8d --- /dev/null +++ b/test/redoundant_union_select.result @@ -0,0 +1,2 @@ +id,name,chief,age +2,john,1,30