Selection can now accept expressions with parenthesis

git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@196 014f5005-505e-4b48-8d0a-63407b615a7c
master
LtWorf 2009-06-29 13:12:45 +07:00
parent 1bc29f8ed5
commit fc49ba5bbe
4 changed files with 99 additions and 45 deletions

@ -87,3 +87,4 @@
- Implemented swap_rename_projection general optimization - Implemented swap_rename_projection general optimization
- Replaced old relational algebra to python compiler with new one based on the new tokenizer/parser (Rev 188) - Replaced old relational algebra to python compiler with new one based on the new tokenizer/parser (Rev 188)
- Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible (Rev 190) - Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible (Rev 190)
- Selection can now accept expressions with parenthesis

@ -31,6 +31,8 @@ A function will have to return the number of changes performed on the tree.
''' '''
import optimizer import optimizer
import parser
sel_op=('//=','**=','and','not','in','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']')
def replace_node(replace,replacement): def replace_node(replace,replacement):
'''This function replaces "replace" node with the node "with", '''This function replaces "replace" node with the node "with",
@ -287,44 +289,82 @@ def subsequent_renames(n):
return changes+recoursive_scan(subsequent_renames,n) return changes+recoursive_scan(subsequent_renames,n)
class level_string(str):
level=0
def tokenize_select(expression): def tokenize_select(expression):
'''This function returns the list of tokens present in a '''This function returns the list of tokens present in a
selection. The expression can't contain parenthesis.''' selection. The expression can contain parenthesis.
op=('//=','**=','and','not','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=') It will use a subclass of str with the attribute level, which
will specify the nesting level of the token into parenthesis.'''
l=0
while l!=len(expression):
l=len(expression)
if expression.startswith('(') and parser.find_matching_parenthesis(expression)+1==len(expression):
expression= expression[1:-1]
tokens=[] tokens=[]
temp='' temp=''
level=0
while len(expression)!=0: while len(expression)!=0:
expression=expression.strip() expression=expression.strip()
if expression[0:3] in op:#3char op
tokens.append(temp) if expression[0:1]=='(': #Expression into parenthesis
level+=1
elif expression[0:1]==')':
level-=1
if expression[0:3] in sel_op:#3char op
t=level_string(temp)
t.level=level
tokens.append(t)
temp='' temp=''
tokens.append(expression[0:3]) t=level_string(expression[0:3])
t.level=level
tokens.append(t)
expression=expression[3:] expression=expression[3:]
elif expression[0:2] in op:#2char op elif expression[0:2] in sel_op:#2char op
tokens.append(temp) t=level_string(temp)
t.level=level
tokens.append(t)
temp='' temp=''
tokens.append(expression[0:2]) t=level_string(expression[0:2])
t.level=level
tokens.append(t)
expression=expression[2:] expression=expression[2:]
elif expression[0:1] in op:#1char op elif expression[0:1] in sel_op:#1char op
tokens.append(temp) t=level_string(temp)
t.level=level
tokens.append(t)
temp='' temp=''
tokens.append(expression[0:1]) t=level_string(expression[0:1])
t.level=level
tokens.append(t)
expression=expression[1:] expression=expression[1:]
elif expression[0:1]=="'":#String elif expression[0:1]=="'":#String
end=expression.index("'",1) end=expression.index("'",1)
while expression[end-1]=='\\': while expression[end-1]=='\\':
end=expression.index("'",end+1) end=expression.index("'",end+1)
#Add string to list #Add string to list
tokens.append(expression[0:end+1]) t=level_string(expression[0:end+1])
t.level=level
tokens.append(t)
expression=expression[end+1:] expression=expression[end+1:]
else: else:
temp+=expression[0:1] temp+=expression[0:1]
expression=expression[1:] expression=expression[1:]
pass pass
if len(temp)!=0: if len(temp)!=0:
tokens.append(temp) t=level_string(temp)
t.level=level
tokens.append(t)
while True:
try:
tokens.remove('')
except:
break
return tokens return tokens
def swap_rename_projection(n): def swap_rename_projection(n):
@ -425,7 +465,7 @@ def selection_and_product(n,rels):
temp=[] temp=[]
for i in tokens: for i in tokens:
if i=='and': if i=='and' and i.level==0:
groups.append(temp) groups.append(temp)
temp=[] temp=[]
else: else:
@ -442,7 +482,7 @@ def selection_and_product(n,rels):
l_fields=False #has fields in left? l_fields=False #has fields in left?
r_fields=False #has fields in left? r_fields=False #has fields in left?
for j in i: for j in set(i).difference(sel_op):
j=j.split('.')[0] j=j.split('.')[0]
if j in l_attr:#Field in left if j in l_attr:#Field in left
l_fields=True l_fields=True
@ -473,6 +513,8 @@ def selection_and_product(n,rels):
l_node.prop+=i+ ' ' l_node.prop+=i+ ' '
if len(left)>0: if len(left)>0:
l_node.prop+=' and ' l_node.prop+=' and '
if '(' in l_node.prop:
l_node.prop='(%s)' % l_node.prop
#Preparing right selection #Preparing right selection
if len(right)>0: if len(right)>0:
@ -489,7 +531,8 @@ def selection_and_product(n,rels):
r_node.prop+=i+ ' ' r_node.prop+=i+ ' '
if len(right)>0: if len(right)>0:
r_node.prop+=' and ' r_node.prop+=' and '
if '(' in r_node.prop:
r_node.prop='(%s)' % r_node.prop
#Changing main selection #Changing main selection
n.prop='' n.prop=''
if len(both)!=0: if len(both)!=0:
@ -499,6 +542,8 @@ def selection_and_product(n,rels):
n.prop+=i+ ' ' n.prop+=i+ ' '
if len(both)>0: if len(both)>0:
n.prop+=' and ' n.prop+=' and '
if '(' in n.prop:
n.prop='(%s)' % n.prop
else:#No need for general select else:#No need for general select
replace_node(n,n.child) replace_node(n,n.child)
@ -506,3 +551,4 @@ def selection_and_product(n,rels):
general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select,futile_union_intersection_subtraction,swap_union_renames,swap_rename_projection] general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select,futile_union_intersection_subtraction,swap_union_renames,swap_rename_projection]
specific_optimizations=[selection_and_product] specific_optimizations=[selection_and_product]

@ -36,16 +36,17 @@ class node (object):
If the node is a binary operator, it will have left and right properties. If the node is a binary operator, it will have left and right properties.
If the node is a unary operator, it will have a child, pointing to the child node and a prop containing If the node is a unary operator, it will have a child, pointing to the child node and a prop containing
the string with the props of the operation.''' the string with the props of the operation.
This class is used to convert an expression into python code.'''
kind=None kind=None
def __init__(self,expression=None): def __init__(self,expression=None):
'''Generates the tree from the tokenized expression
If no expression is specified then it will create an empty node'''
if expression==None or len(expression)==0: if expression==None or len(expression)==0:
return return
'''Generates the tree from the tokenized expression'''
#If the list contains only a list, it will consider the lower level list. #If the list contains only a list, it will consider the lower level list.
#This will allow things like ((((((a))))) to work #This will allow things like ((((((a))))) to work
while len(expression)==1 and isinstance(expression[0],list): while len(expression)==1 and isinstance(expression[0],list):
@ -85,7 +86,8 @@ class node (object):
return return
pass pass
def toPython(self): def toPython(self):
'''This method converts the expression into python code''' '''This method converts the expression into python code, which will require the
relation module to be executed.'''
if self.name in b_operators: if self.name in b_operators:
return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython()) return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython())
elif self.name in u_operators: elif self.name in u_operators:
@ -136,10 +138,6 @@ class node (object):
return _fields return _fields
elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'): elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels)))) return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
pass
def __eq__(self,other): def __eq__(self,other):
if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind): if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind):
return False return False
@ -168,6 +166,19 @@ class node (object):
return (le+ self.name +re) return (le+ self.name +re)
def find_matching_parenthesis(expression,start=0):
'''This function returns the position of the matching
close parenthesis to the 1st open parenthesis found
starting from start (0 by default)'''
par_count=0 #Count of parenthesis
for i in range(start,len(expression)):
if expression[i]=='(':
par_count+=1
elif expression[i]==')':
par_count-=1
if par_count==0:
return i #Closing parenthesis of the parameter
def tokenize(expression): def tokenize(expression):
'''This function converts an expression into a list where '''This function converts an expression into a list where
every token of the expression is an item of a list. Expressions into every token of the expression is an item of a list. Expressions into
@ -198,17 +209,7 @@ def tokenize(expression):
while len(expression)>0: while len(expression)>0:
if expression.startswith('('): #Parenthesis state if expression.startswith('('): #Parenthesis state
state=2 state=2
par_count=0 #Count of parenthesis end=find_matching_parenthesis(expression)
end=0
for i in range(len(expression)):
if expression[i]=='(':
par_count+=1
elif expression[i]==')':
par_count-=1
if par_count==0:
end=i
break
#Appends the tokenization of the content of the parenthesis #Appends the tokenization of the content of the parenthesis
items.append(tokenize(expression[1:end])) items.append(tokenize(expression[1:end]))
#Removes the entire parentesis and content from the expression #Removes the entire parentesis and content from the expression
@ -217,9 +218,13 @@ def tokenize(expression):
elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes
items.append(expression[0:2]) #Adding operator in the top of the list items.append(expression[0:2]) #Adding operator in the top of the list
expression=expression[2:].strip() #Removing operator from the expression expression=expression[2:].strip() #Removing operator from the expression
if expression.startswith('('): #Expression with parenthesis, so adding what's between open and close without tokenization
par=expression.find('(',find_matching_parenthesis(expression))
else: #Expression without parenthesis, so adding what's between start and parenthesis as whole
par=expression.find('(') par=expression.find('(')
items.append(expression[:par]) #Inserting parameter of the operator items.append(expression[:par].strip()) #Inserting parameter of the operator
expression=expression[par:].strip() #Removing parameter from the expression expression=expression[par:].strip() #Removing parameter from the expression
elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte
items.append(expression[0]) items.append(expression[0])
@ -291,4 +296,3 @@ if __name__=="__main__":
while True: while True:
e=raw_input("Expression: ") e=raw_input("Expression: ")
print parse(e) print parse(e)

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Relational # Relational
# Copyright (C) 2008 Salvo "LtWorf" Tomaselli # Copyright (C) 2008 Salvo "LtWorf" Tomaselli
# #
@ -16,7 +17,9 @@
# #
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it> # author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
'''Custom types for relational algebra''' '''Custom types for relational algebra.
Purpose of this module is having the isFloat function and
implementing dates to use in selection.'''
import datetime import datetime