optimization on product and select working

git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@164 014f5005-505e-4b48-8d0a-63407b615a7c
master
LtWorf 2009-05-12 13:30:19 +07:00
parent ccc8a5ee0d
commit 004a6bb84e
2 changed files with 122 additions and 10 deletions

@ -213,6 +213,7 @@ def swap_rename_select(n):
them with ρ j(σ k(R)). Renaming the attributes used in the
selection, so the operation is still valid.'''
#TODO document into the wiki
#FIXME selection of date.day won't work.
changes=0
if n.name=='σ' and n.child.name=='ρ':
@ -248,4 +249,117 @@ def swap_rename_select(n):
changes+=swap_rename_select(n.left)
return changes
def selection_and_product(n,rels):
'''This function locates things like σ k (R*Q) and converts them into
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
i contains attributes belonging to Q and l contains attributes belonging to both'''
#TODO document in the wiki
changes=0
if n.name=='σ' and n.child.name=='*':
changes=1
l_attr=n.child.left.result_format(rels)
r_attr=n.child.right.result_format(rels)
tokens=tokenize_select(n.prop)
groups=[]
temp=[]
for i in tokens:
if i=='and':
groups.append(temp)
temp=[]
else:
temp.append(i)
if len(temp)!=0:
groups.append(temp)
temp=[]
left=[]
right=[]
both=[]
print "Attributi R",r_attr, "Attributi L",l_attr
print "Gruppi",groups
for i in groups:
l_fields=False #has fields in left?
r_fields=False #has fields in left?
for j in i:
if j in l_attr:#Field in left
l_fields=True
if j in r_attr:#Field in right
r_fields=True
if l_fields and r_fields:#Fields in both
both.append(i)
elif l_fields:
left.append(i)
elif r_fields:
right.append(i)
else:#Unknown.. adding in both
both.append(i)
print "left", left, "right",right,"both",both
#Preparing left selection
if len(left)>0:
l_node=optimizer.node()
l_node.name='σ'
l_node.kind=optimizer.UNARY
l_node.child=n.child.left
l_node.prop=''
n.child.left=l_node
while len(left)>0:
c=left.pop(0)
for i in c:
l_node.prop+=i+ ' '
if len(left)>0:
l_node.prop+=' and '
#Preparing right selection
if len(right)>0:
r_node=optimizer.node()
r_node.name='σ'
r_node.prop=''
r_node.kind=optimizer.UNARY
r_node.child=n.child.right
n.child.right=r_node
while len(right)>0:
c=right.pop(0)
for i in c:
r_node.prop+=i+ ' '
if len(right)>0:
r_node.prop+=' and '
#Changing main selection
n.prop=''
if len(both)!=0:
while len(both)>0:
c=both.pop(0)
for i in c:
n.prop+=i+ ' '
if len(both)>1:
n.prop+=' and '
else:#No need for general select
n.name=n.child.name
n.kind=n.child.kind
n.left=n.child.left
n.right=n.child.right
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=selection_and_product(n.child,rels)
elif n.kind==optimizer.BINARY:
changes+=selection_and_product(n.right,rels)
changes+=selection_and_product(n.left,rels)
return changes
general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select]
specific_optimizations=[selection_and_product]

@ -39,11 +39,7 @@ class node (object):
If the node is a binary operator, it will have left and right properties.
If the node is a unary operator, it will have a child, pointing to the child node and a prop containing
the string with the props of the operation.
It can be helpful to know the fields returned by an operation. Providing a dictionary with names and instances
of relations in the constructor, the node is able to return the list of fields that the result will have.
'''
the string with the props of the operation.'''
kind=None
def __init__(self,expression=None):
@ -80,14 +76,12 @@ class node (object):
'''This function returns a list containing the fields that the resulting relation will have.
Since it needs to know real instances of relations, it requires a dictionary where keys are
the names of the relations and the values are the relation objects.'''
print "Rels========",rels
if rels==None:
return
if self.kind==RELATION:
return rels[self.name].header.attributes
elif self.kind==BINARY and self.name in ('-','',''):
print "OK"
return self.left.result_format(rels)
elif self.name=='π':
l=[]
@ -246,19 +240,23 @@ if __name__=="__main__":
#a= tokenize(u"π a,b (a*b)")
#a=tokenize("(a-b*c)*(b-c)")
import relation
import relation,optimizations
rels={}
rels["P1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/people.csv")
rels["P2"]= relation.relation("/home/salvo/dev/relational/trunk/samples/people.csv")
rels["R1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/person_room.csv")
rels["R2"]= relation.relation("/home/salvo/dev/relational/trunk/samples/person_room.csv")
rels["D1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/dates.csv")
rels["S1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/skillo.csv")
print rels
#n=tree("π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))")
n=tree("P1 ᐅᐊ R2")
n=tree("σ id==3 and id==indice and indice==2 and name==5(P1 * S1)")
print optimizations.selection_and_product(n,rels)
print n
print n.result_format(rels)
#a=general_optimize("σ age==3 and qq<=2 or nome!='ciccio d\\'urso'(ρ ciccio➡age,nome➡nom(R-Q))")
#a=general_optimize("σ i==2 (σ b>5 (d))")
#print a