Selection can now accept expressions with parenthesis

git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@196 014f5005-505e-4b48-8d0a-63407b615a7c
2009-06-29 13:12:45 +07:00 · 2009-06-29 13:12:45 +07:00 · fc49ba5bbe
parent 1bc29f8ed5
commit fc49ba5bbe
4 changed files with 99 additions and 45 deletions
--- a/1
+++ b/1
@ -87,3 +87,4 @@
 - Implemented swap_rename_projection general optimization
 - Replaced old relational algebra to python compiler with new one based on the new tokenizer/parser (Rev 188)
 - Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible (Rev 190)
 - Selection can now accept expressions with parenthesis
--- a/relational/optimizations.py
+++ b/relational/optimizations.py
@ -31,6 +31,8 @@ A function will have to return the number of changes performed on the tree.
 '''
 import optimizer
 import parser
 sel_op=('//=','**=','and','not','in','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']')
 def replace_node(replace,replacement):
    '''This function replaces "replace" node with the node "with",
@ -287,44 +289,82 @@ def subsequent_renames(n):
    return changes+recoursive_scan(subsequent_renames,n)
 class level_string(str):
    level=0
 def tokenize_select(expression):
    '''This function returns the list of tokens present in a
-    selection. The expression can't contain parenthesis.'''
+    selection. The expression can contain parenthesis.
-    op=('//=','**=','and','not','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=')
+    It will use a subclass of str with the attribute level, which
    will specify the nesting level of the token into parenthesis.'''
    l=0
    while l!=len(expression):
        l=len(expression)
        if expression.startswith('(') and parser.find_matching_parenthesis(expression)+1==len(expression):
            expression= expression[1:-1]
    tokens=[]
    temp=''
    level=0
    while len(expression)!=0:
        expression=expression.strip()
-        if expression[0:3] in op:#3char op
+        
-            tokens.append(temp)
+        if expression[0:1]=='(': #Expression into parenthesis
            level+=1
        elif expression[0:1]==')':
            level-=1
        if expression[0:3] in sel_op:#3char op
            t=level_string(temp)
            t.level=level
            tokens.append(t)
            temp=''
-            tokens.append(expression[0:3])
+            t=level_string(expression[0:3])
            t.level=level            
            tokens.append(t)
            expression=expression[3:]
-        elif expression[0:2] in op:#2char op
+        elif expression[0:2] in sel_op:#2char op
-            tokens.append(temp)
+            t=level_string(temp)
            t.level=level
            tokens.append(t)
            temp=''
-            tokens.append(expression[0:2])
+            t=level_string(expression[0:2])
            t.level=level
            tokens.append(t)
            expression=expression[2:]
-        elif expression[0:1] in op:#1char op
+        elif expression[0:1] in sel_op:#1char op
-            tokens.append(temp)
+            t=level_string(temp)
            t.level=level
            tokens.append(t)
            temp=''
-            tokens.append(expression[0:1])
+            t=level_string(expression[0:1])
            t.level=level
            tokens.append(t)
            expression=expression[1:]
        elif expression[0:1]=="'":#String
            end=expression.index("'",1)
            while expression[end-1]=='\\':
                end=expression.index("'",end+1)
            #Add string to list
-            tokens.append(expression[0:end+1])
+            t=level_string(expression[0:end+1])
            t.level=level
            tokens.append(t)
            expression=expression[end+1:]
        else:
            temp+=expression[0:1]
            expression=expression[1:]
            pass
    if len(temp)!=0:
-        tokens.append(temp)
+        t=level_string(temp)
        t.level=level
        tokens.append(t)
    while True:
        try:
            tokens.remove('')
        except:
            break
    return tokens
 def swap_rename_projection(n):
@ -425,7 +465,7 @@ def selection_and_product(n,rels):
        temp=[]
        for i in tokens:
-            if i=='and':
+            if i=='and' and i.level==0:
                groups.append(temp)
                temp=[]
            else:
@ -442,7 +482,7 @@ def selection_and_product(n,rels):
            l_fields=False #has fields in left?
            r_fields=False #has fields in left?
-            for j in i:
+            for j in set(i).difference(sel_op):
                j=j.split('.')[0]
                if j in l_attr:#Field in left
                    l_fields=True
@ -473,6 +513,8 @@ def selection_and_product(n,rels):
                    l_node.prop+=i+ ' '
                if len(left)>0:
                    l_node.prop+=' and '
            if '(' in l_node.prop:
                l_node.prop='(%s)' % l_node.prop
        #Preparing right selection
        if len(right)>0:
@ -489,7 +531,8 @@ def selection_and_product(n,rels):
                    r_node.prop+=i+ ' '
                if len(right)>0:
                    r_node.prop+=' and '
-                    
+            if '(' in r_node.prop:
                r_node.prop='(%s)' % r_node.prop
        #Changing main selection
        n.prop=''
        if len(both)!=0:
@ -499,6 +542,8 @@ def selection_and_product(n,rels):
                    n.prop+=i+ ' '
                if len(both)>0:
                    n.prop+=' and '
            if '(' in n.prop:
                n.prop='(%s)' % n.prop
        else:#No need for general select
            replace_node(n,n.child)
@ -506,3 +551,4 @@ def selection_and_product(n,rels):
 general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select,futile_union_intersection_subtraction,swap_union_renames,swap_rename_projection]
 specific_optimizations=[selection_and_product]
--- a/relational/parser.py
+++ b/relational/parser.py
@ -36,16 +36,17 @@ class node (object):
    If the node is a binary operator, it will have left and right properties.
    If the node is a unary operator, it will have a child, pointing to the child node and a prop containing
-    the string with the props of the operation.'''
+    the string with the props of the operation.
    This class is used to convert an expression into python code.'''
    kind=None
    def __init__(self,expression=None):
-        
+        '''Generates the tree from the tokenized expression
        If no expression is specified then it will create an empty node'''
        if expression==None or len(expression)==0:
            return
        '''Generates the tree from the tokenized expression'''
        #If the list contains only a list, it will consider the lower level list.
        #This will allow things like ((((((a))))) to work
        while len(expression)==1 and isinstance(expression[0],list): 
@ -85,7 +86,8 @@ class node (object):
                return       
        pass
    def toPython(self):
-        '''This method converts the expression into python code'''
+        '''This method converts the expression into python code, which will require the
        relation module to be executed.'''
        if self.name in b_operators:
            return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython())
        elif self.name in u_operators:
@ -136,10 +138,6 @@ class node (object):
            return _fields
        elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
            return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
        pass
    def __eq__(self,other):
        if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind):
            return False
@ -168,6 +166,19 @@ class node (object):
            return (le+ self.name +re)
 def find_matching_parenthesis(expression,start=0):
    '''This function returns the position of the matching
    close parenthesis to the 1st open parenthesis found
    starting from start (0 by default)'''
    par_count=0 #Count of parenthesis
    for i in range(start,len(expression)):
        if expression[i]=='(':
            par_count+=1
        elif expression[i]==')':
            par_count-=1
            if par_count==0:
                return i #Closing parenthesis of the parameter
 def tokenize(expression):
    '''This function converts an expression into a list where
    every token of the expression is an item of a list. Expressions into
@ -198,17 +209,7 @@ def tokenize(expression):
    while len(expression)>0:
        if expression.startswith('('): #Parenthesis state
            state=2
-            par_count=0 #Count of parenthesis
+            end=find_matching_parenthesis(expression)
            end=0
            for i in range(len(expression)):
                if expression[i]=='(':
                    par_count+=1
                elif expression[i]==')':
                    par_count-=1
                    if par_count==0:
                        end=i
                        break
            #Appends the tokenization of the content of the parenthesis
            items.append(tokenize(expression[1:end]))
            #Removes the entire parentesis and content from the expression
@ -217,9 +218,13 @@ def tokenize(expression):
        elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes
            items.append(expression[0:2]) #Adding operator in the top of the list
            expression=expression[2:].strip() #Removing operator from the expression
            if expression.startswith('('): #Expression with parenthesis, so adding what's between open and close without tokenization
                par=expression.find('(',find_matching_parenthesis(expression)) 
            else: #Expression without parenthesis, so adding what's between start and parenthesis as whole  
                par=expression.find('(')
-            items.append(expression[:par]) #Inserting parameter of the operator
+            items.append(expression[:par].strip()) #Inserting parameter of the operator
            expression=expression[par:].strip() #Removing parameter from the expression
        elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte
            items.append(expression[0])
@ -291,4 +296,3 @@ if __name__=="__main__":
    while True:
        e=raw_input("Expression: ")
        print parse(e)
--- a/relational/rtypes.py
+++ b/relational/rtypes.py
@ -1,3 +1,4 @@
 # -*- coding: utf-8 -*-
 # Relational
 # Copyright (C) 2008  Salvo "LtWorf" Tomaselli
 # 
@ -16,7 +17,9 @@
 # 
 # author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
-'''Custom types for relational algebra'''
+'''Custom types for relational algebra.
 Purpose of this module is having the isFloat function and
 implementing dates to use in selection.'''
 import datetime