Merge pull request #20 from ltworf/refactor_parser

Refactor parser
master
Salvo 'LtWorf' Tomaselli 2020-06-09 23:49:11 +07:00 committed by GitHub
commit 5d3823d0ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 436 additions and 492 deletions

@ -1,4 +1,7 @@
2.6
3.0
- Refactored parser to use better typing
- Refactored and fixed some optimizations
- Added more test cases
- Improved survey sending
- Prevent relation/field names from being reserved keywords
- Fixed issue in cli where loading an invalid file would lead to a crash

@ -1,5 +1,5 @@
# Relational
# Copyright (C) 2009-2018 Salvo "LtWorf" Tomaselli
# Copyright (C) 2009-2020 Salvo "LtWorf" Tomaselli
#
# Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -30,8 +30,9 @@
from io import StringIO
from tokenize import generate_tokens
from typing import Tuple, Dict
from relational.relation import Relation
from relational import parser
sel_op = (
@ -98,36 +99,7 @@ def replace_node(replace, replacement):
replace.left = replacement.left
def recoursive_scan(function, node, rels=None):
'''Does a recoursive optimization on the tree.
This function will recoursively execute the function given
as "function" parameter starting from node to all the tree.
if rels is provided it will be passed as argument to the function.
Otherwise the function will be called just on the node.
Result value: function is supposed to return the amount of changes
it has performed on the tree.
The various result will be added up and this final value will be the
returned value.'''
changes = 0
# recoursive scan
if node.kind == parser.UNARY:
if rels != None:
changes += function(node.child, rels)
else:
changes += function(node.child)
elif node.kind == parser.BINARY:
if rels != None:
changes += function(node.right, rels)
changes += function(node.left, rels)
else:
changes += function(node.right)
changes += function(node.left)
return changes
def duplicated_select(n: parser.Node) -> int:
def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates and deletes things like
σ a ( σ a(C)) and the ones like σ a ( σ b(C))
replacing the 1st one with a single select and
@ -135,243 +107,189 @@ def duplicated_select(n: parser.Node) -> int:
in and
'''
changes = 0
if n.name == SELECTION and n.child.name == SELECTION:
while n.name == SELECTION and n.child.name == SELECTION:
changes += 1
prop = n.prop
if n.prop != n.child.prop: # Nested but different, joining them
n.prop = n.prop + " and " + n.child.prop
prop = n.prop + " and " + n.child.prop
# This adds parenthesis if they are needed
if n.child.prop.startswith('(') or n.prop.startswith('('):
n.prop = '(%s)' % n.prop
n.child = n.child.child
changes = 1
changes += duplicated_select(n)
return changes + recoursive_scan(duplicated_select, n)
prop = '(%s)' % prop
n = parser.Unary(
SELECTION,
prop,
n.child.child,
)
return n, changes
def futile_union_intersection_subtraction(n: parser.Node) -> int:
'''This function locates things like r r, and replaces them with r.
R R --> R
R R --> R
def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like r r, and replaces them with r.
R R --> R
R R --> R
R - R --> σ False (R)
σ k (R) - R --> σ False (R)
R - σ k (R) --> σ not k (R)
σ k (R) R --> R
σ k (R) R --> σ k (R)
σ k (R) R --> R
σ k (R) R --> σ k (R)
'''
changes = 0
# Union and intersection of the same thing
if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right:
changes = 1
replace_node(n, n.left)
return n.left, 1
# selection and union of the same thing
elif (n.name == UNION):
if n.left.name == SELECTION and n.left.child == n.right:
changes = 1
replace_node(n, n.right)
return n.right, 1
elif n.right.name == SELECTION and n.right.child == n.left:
changes = 1
replace_node(n, n.left)
return n.left, 1
# selection and intersection of the same thing
elif n.name == INTERSECTION:
if n.left.name == SELECTION and n.left.child == n.right:
changes = 1
replace_node(n, n.left)
return n.left, 1
elif n.right.name == SELECTION and n.right.child == n.left:
changes = 1
replace_node(n, n.right)
return n.right, 1
# Subtraction and selection of the same thing
elif n.name == DIFFERENCE and \
n.right.name == SELECTION and \
n.right.child == n.left:
n.name = n.right.name
n.kind = n.right.kind
n.child = n.right.child
n.prop = '(not (%s))' % n.right.prop
n.left = n.right = None
return parser.Unary(
SELECTION,
'(not (%s))' % n.right.prop,
n.right.child), 1
# Subtraction of the same thing or with selection on the left child
elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)):
changes = 1
n.kind = parser.UNARY
n.name = SELECTION
n.prop = 'False'
n.child = n.left.get_left_leaf()
# n.left=n.right=None
return changes + recoursive_scan(futile_union_intersection_subtraction, n)
return parser.Unary(
SELECTION,
'False',
n.get_left_leaf()
), 1
return n, 0
def down_to_unions_subtractions_intersections(n: parser.Node) -> int:
'''This funcion locates things like σ i==2 (c d), where the union
def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]:
'''This funcion locates things like σ i==2 (c d), where the union
can be a subtraction and an intersection and replaces them with
σ i==2 (c) σ i==2(d).
σ i==2 (c) σ i==2(d).
'''
changes = 0
_o = (UNION, DIFFERENCE, INTERSECTION)
if n.name == SELECTION and n.child.name in _o:
l = parser.Unary(SELECTION, n.prop, n.child.left)
r = parser.Unary(SELECTION, n.prop, n.child.right)
left = parser.Node()
left.prop = n.prop
left.name = n.name
left.child = n.child.left
left.kind = parser.UNARY
right = parser.Node()
right.prop = n.prop
right.name = n.name
right.child = n.child.right
right.kind = parser.UNARY
n.name = n.child.name
n.left = left
n.right = right
n.child = None
n.prop = None
n.kind = parser.BINARY
changes += 1
return changes + recoursive_scan(down_to_unions_subtractions_intersections, n)
return parser.Binary(n.child.name, l, r), 1
return n, 0
def duplicated_projection(n: parser.Node) -> int:
def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates thing like π i ( π j (R)) and replaces
them with π i (R)'''
changes = 0
if n.name == PROJECTION and n.child.name == PROJECTION:
n.child = n.child.child
changes += 1
return changes + recoursive_scan(duplicated_projection, n)
return parser.Unary(
PROJECTION,
n.prop,
n.child.child), 1
return n, 0
def selection_inside_projection(n: parser.Node) -> int:
def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like σ j (π k(R)) and
converts them into π k(σ j (R))'''
changes = 0
if n.name == SELECTION and n.child.name == PROJECTION:
changes = 1
temp = n.prop
n.prop = n.child.prop
n.child.prop = temp
n.name = PROJECTION
n.child.name = SELECTION
child = parser.Unary(
SELECTION,
n.prop,
n.child.child
)
return changes + recoursive_scan(selection_inside_projection, n)
return parser.Unary(PROJECTION, n.child.prop, child), 0
return n, 0
def swap_union_renames(n: parser.Node) -> int:
def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like
ρ ab(R) ρ ab(Q)
ρ ab(R) ρ ab(Q)
and replaces them with
ρ ab(R Q).
ρ ab(R Q).
Does the same with subtraction and intersection'''
changes = 0
if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == n.right.name and n.left.name == RENAME:
l_vars = {}
for i in n.left.prop.split(','):
q = i.split(ARROW)
l_vars[q[0].strip()] = q[1].strip()
r_vars = {}
for i in n.right.prop.split(','):
q = i.split(ARROW)
r_vars[q[0].strip()] = q[1].strip()
if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == RENAME and n.right.name == RENAME:
l_vars = n.left.get_rename_prop()
r_vars = n.right.get_rename_prop()
if r_vars == l_vars:
changes = 1
# Copying self, but child will be child of renames
q = parser.Node()
q.name = n.name
q.kind = parser.BINARY
q.left = n.left.child
q.right = n.right.child
n.name = RENAME
n.kind = parser.UNARY
n.child = q
n.prop = n.left.prop
n.left = n.right = None
return changes + recoursive_scan(swap_union_renames, n)
child = parser.Binary(n.name, n.left.child, n.right.child)
return parser.Unary(RENAME, n.left.prop, child), 1
return n, 0
def futile_renames(n: parser.Node) -> int:
'''This function purges renames like id->id'''
changes = 0
def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function purges renames like
ρ id->id,a->q (A)
into
ρ a->q (A)
or removes the operation entirely if they all get removed
'''
if n.name == RENAME:
# Located two nested renames.
changes = 1
renames = n.get_rename_prop()
changes = False
for k, v in renames.items():
if k == v:
changes = True
del renames[k]
if len(renames) == 0: # Nothing to rename, removing the rename
return n.child, 1
elif changes:
# Changing the node in place, no need to return to cause a recursive step
n.set_rename_prop(renames)
# Creating a dictionary with the attributes
_vars = {}
for i in n.prop.split(','):
q = i.split(ARROW)
_vars[q[0].strip()] = q[1].strip()
# Scans dictionary to locate things like "a->b,b->c" and replace them
# with "a->c"
for key in list(_vars.keys()):
value = _vars.get(key)
if key == value:
_vars.pop(value) # Removes the unused one
if len(_vars) == 0: # Nothing to rename, removing the rename op
replace_node(n, n.child)
else:
n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
return changes + recoursive_scan(futile_renames, n)
return n, 0
def subsequent_renames(n: parser.Node) -> int:
'''This function removes redoundant subsequent renames joining them into one'''
'''Purges renames like id->id Since it's needed to be performed BEFORE this one
so it is not in the list with the other optimizations'''
futile_renames(n)
changes = 0
def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function removes redundant subsequent renames joining them into one
ρ .. ρ .. (A)
into
ρ ... (A)
'''
if n.name == RENAME and n.child.name == RENAME:
# Located two nested renames.
changes = 1
# Joining the attribute into one
n.prop += ',' + n.child.prop
n.child = n.child.child
prop = n.prop + ',' + n.child.prop
child = n.child.child
n = parser.Unary(RENAME, prop, child)
# Creating a dictionary with the attributes
_vars = {}
for i in n.prop.split(','):
q = i.split(ARROW)
_vars[q[0].strip()] = q[1].strip()
renames = n.get_rename_prop()
# Scans dictionary to locate things like "a->b,b->c" and replace them
# with "a->c"
for key in list(_vars.keys()):
value = _vars.get(key)
if value in _vars.keys():
if _vars[value] != key:
for key, value in tuple(renames.items()):
if value in renames:
if renames[value] != key:
# Double rename on attribute
_vars[key] = _vars[_vars[key]] # Sets value
_vars.pop(value) # Removes the unused one
renames[key] = renames[renames[key]] # Sets value
del renames[value] # Removes the unused one
else: # Cycle rename a->b,b->a
_vars.pop(value) # Removes the unused one
_vars.pop(key) # Removes the unused one
del renames[value] # Removes the unused one
del renames[key] # Removes the unused one
if len(_vars) == 0: # Nothing to rename, removing the rename op
replace_node(n, n.child)
if len(renames) == 0: # Nothing to rename, removing the rename op
return n.child, 1
else:
n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
n.set_rename_prop(renames)
return n, 1
return changes + recoursive_scan(subsequent_renames, n)
return n, 0
class level_string(str):
@ -411,101 +329,84 @@ def tokenize_select(expression):
return l
def swap_rename_projection(n: parser.Node) -> int:
'''This function locates things like π k(ρ j(R))
and replaces them with ρ j(π k(R)).
def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like
π k(ρ j(R))
and replaces them with
ρ j(π k(R)).
This will let rename work on a hopefully smaller set
and more important, will hopefully allow further optimizations.
Will also eliminate fields in the rename that are cutted in the projection.
Will also eliminate fields in the rename that are cut in the projection.
'''
changes = 0
if n.name == PROJECTION and n.child.name == RENAME:
changes = 1
# π index,name(ρ id➡index(R))
_vars = {}
for i in n.child.prop.split(','):
q = i.split(ARROW)
_vars[q[1].strip()] = q[0].strip()
renames = n.child.get_rename_prop()
projections = set(n.get_projection_prop())
_pr = n.prop.split(',')
for i in range(len(_pr)):
try:
_pr[i] = _vars[_pr[i].strip()]
except:
pass
# Use pre-rename names in the projection
for k, v in renames.items():
if v in projections:
projections.remove(v)
projections.add(k)
_pr_reborn = n.prop.split(',')
for i in list(_vars.keys()):
if i not in _pr_reborn:
_vars.pop(i)
n.name = n.child.name
# Eliminate fields
for i in list(renames.keys()):
if i not in projections:
del renames[i]
n.prop = ','.join('%s%s%s' % (i[1], ARROW, i[0]) for i in _vars.items())
child = parser.Unary(PROJECTION,'' , n.child.child)
child.set_projection_prop(projections)
n = parser.Unary(RENAME, '', child)
n.set_rename_prop(renames)
return n, 1
n.child.name = PROJECTION
n.child.prop = ''
for i in _pr:
n.child.prop += i + ','
n.child.prop = n.child.prop[:-1]
return changes + recoursive_scan(swap_rename_projection, n)
return n, 0
def swap_rename_select(n: parser.Node) -> int:
'''This function locates things like σ k(ρ j(R)) and replaces
them with ρ j(σ k(R)). Renaming the attributes used in the
'''This function locates things like
σ k(ρ j(R))
and replaces them with
ρ j(σ k(R)).
Renaming the attributes used in the
selection, so the operation is still valid.'''
changes = 0
if n.name == SELECTION and n.child.name == RENAME:
changes = 1
# Dictionary containing attributes of rename
_vars = {}
for i in n.child.prop.split(','):
q = i.split(ARROW)
_vars[q[1].strip()] = q[0].strip()
# This is an inverse mapping for the rename
renames = {v: k for k, v in n.child.get_rename_prop().items()}
# tokenizes expression in select
_tokens = tokenize_select(n.prop)
tokens = tokenize_select(n.prop)
# Renaming stuff
for i in range(len(_tokens)):
splitted = _tokens[i].split('.', 1)
if splitted[0] in _vars:
if len(splitted) == 1:
_tokens[i] = _vars[_tokens[i].split('.')[0]]
else:
_tokens[i] = _vars[
_tokens[i].split('.')[0]] + '.' + splitted[1]
# Renaming stuff, no enum because I edit the tokens
for i in range(len(tokens)):
splitted = tokens[i].split('.', 1)
if splitted[0] in renames:
tokens[i] = renames[splitted[0]]
if len(splitted) > 1:
tokens[i] += '.' + splitted[1]
# Swapping operators
n.name = RENAME
n.child.name = SELECTION
n.prop = n.child.prop
n.child.prop = ' '.join(_tokens)
return changes + recoursive_scan(swap_rename_select, n)
child = parser.Unary(SELECTION, ' '.join(tokens), n.child.child)
return parser.Unary(RENAME, n.child.prop, child), 1
return n, 0
def select_union_intersect_subtract(n: parser.Node) -> int:
'''This function locates things like σ i(a) σ q(a)
and replaces them with σ (i OR q) (a)
'''This function locates things like
σ i(a) σ q(a)
and replaces them with
σ (i OR q) (a)
Removing a O() operation like the union'''
changes = 0
if n.name in {UNION, INTERSECTION, DIFFERENCE} and \
n.left.name == SELECTION and \
n.right.name == SELECTION and \
n.left.child == n.right.child:
changes = 1
d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'}
op = d[n.name]
newnode = parser.Node()
if n.left.prop.startswith('(') or n.right.prop.startswith('('):
t_str = '('
if n.left.prop.startswith('('):
@ -519,54 +420,34 @@ def select_union_intersect_subtract(n: parser.Node) -> int:
t_str += '%s'
t_str += ')'
newnode.prop = t_str % (n.left.prop, op, n.right.prop)
prop = t_str % (n.left.prop, op, n.right.prop)
else:
newnode.prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
newnode.name = SELECTION
newnode.child = n.left.child
newnode.kind = parser.UNARY
replace_node(n, newnode)
return changes + recoursive_scan(select_union_intersect_subtract, n)
prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
return parser.Unary(SELECTION, prop, n.left.child), 1
return n, 0
def union_and_product(n: parser.Node) -> int:
def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]:
'''
A * B A * C = A * (B C)
Same thing with inner join
'''
changes = 0
if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name:
newnode = parser.Node()
newnode.kind = parser.BINARY
newnode.name = n.left.name
newchild = parser.Node()
newchild.kind = parser.BINARY
newchild.name = UNION
if n.left.left == n.right.left or n.left.left == n.right.right:
newnode.left = n.left.left
newnode.right = newchild
newchild.left = n.left.right
newchild.right = n.right.left if n.left.left == n.right.right else n.right.right
replace_node(n, newnode)
changes = 1
l = n.left.right
r = n.right.left if n.left.left == n.right.right else n.right.right
newchild = parser.Binary(UNION, l, r)
return parser.Binary(n.left.name, n.left.left, newchild), 1
elif n.left.right == n.right.left or n.left.left == n.right.right:
newnode.left = n.left.right
newnode.right = newchild
newchild.left = n.left.left
newchild.right = n.right.left if n.right.left == n.right.right else n.right.right
replace_node(n, newnode)
changes = 1
return changes + recoursive_scan(union_and_product, n)
l = n.left.left
r = n.right.left if n.right.left == n.right.right else n.right.right
newchild = parser.Binary(UNION, l, r)
return parser.Binary(n.left.name, n.left.right, newchild), 1
return n, 0
def projection_and_union(n, rels):
def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
'''
Turns
π a,b,c(A) π a,b,c(B)
@ -581,28 +462,16 @@ def projection_and_union(n, rels):
n.left.name == PROJECTION and \
n.right.name == PROJECTION and \
set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)):
newchild = parser.Node()
newchild.kind = parser.BINARY
newchild.name = UNION
newchild.left = n.left.child
newchild.right = n.right.child
newnode = parser.Node()
newnode.child = newchild
newnode.kind = parser.UNARY
newnode.name = PROJECTION
newnode.prop = n.right.prop
replace_node(n, newnode)
changes = 1
return changes + recoursive_scan(projection_and_union, n, rels)
child = parser.Binary(UNION, n.left.child, n.right.child)
return parser.Unary(PROJECTION, n.right.prop, child), 0
return n, 0
def selection_and_product(n, rels):
def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.Node:
'''This function locates things like σ k (R*Q) and converts them into
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
i contains attributes belonging to Q and l contains attributes belonging to both'''
changes = 0
if n.name == SELECTION and n.child.name in (PRODUCT, JOIN):
l_attr = n.child.left.result_format(rels)
@ -637,76 +506,71 @@ def selection_and_product(n, rels):
if j in r_attr: # Field in right
r_fields = True
if l_fields and r_fields: # Fields in both
both.append(i)
elif l_fields:
if l_fields and not r_fields:
left.append(i)
elif r_fields:
elif r_fields and not l_fields:
right.append(i)
else: # Unknown.. adding in both
both.append(i)
# Preparing left selection
if len(left) > 0:
changes = 1
l_node = parser.Node()
l_node.name = SELECTION
l_node.kind = parser.UNARY
l_node.child = n.child.left
l_node.prop = ''
n.child.left = l_node
if left:
l_prop = ''
while len(left) > 0:
c = left.pop(0)
for i in c:
l_node.prop += i + ' '
l_prop += i + ' '
if len(left) > 0:
l_node.prop += ' and '
if '(' in l_node.prop:
l_node.prop = '(%s)' % l_node.prop
l_prop += ' and '
if '(' in l_prop:
l_prop = '(%s)' % l_prop
l_node = parser.Unary(SELECTION, l_prop, n.child.left)
else:
l_node = n.child.left
# Preparing right selection
if len(right) > 0:
changes = 1
r_node = parser.Node()
r_node.name = SELECTION
r_node.prop = ''
r_node.kind = parser.UNARY
r_node.child = n.child.right
n.child.right = r_node
if right:
r_prop = ''
while len(right) > 0:
c = right.pop(0)
r_node.prop += ' '.join(c)
r_prop += ' '.join(c)
if len(right) > 0:
r_node.prop += ' and '
if '(' in r_node.prop:
r_node.prop = '(%s)' % r_node.prop
r_prop += ' and '
if '(' in r_prop:
r_prop = '(%s)' % r_prop
r_node = parser.Unary(SELECTION, r_prop, n.child.right)
else:
r_node = n.child.right
b_node = parser.Binary(n.child.name, l_node, r_node)
# Changing main selection
n.prop = ''
if len(both) != 0:
if both:
both_prop = ''
while len(both) > 0:
c = both.pop(0)
n.prop += ' '.join(c)
both_prop += ' '.join(c)
if len(both) > 0:
n.prop += ' and '
if '(' in n.prop:
n.prop = '(%s)' % n.prop
both_prop += ' and '
if '(' in both_prop:
both_prop = '(%s)' % both_prop
r = parser.Unary(SELECTION, both_prop, b_node)
return r, len(left) + len(right)
else: # No need for general select
replace_node(n, n.child)
return b_node, 1
return changes + recoursive_scan(selection_and_product, n, rels)
return n, 0
def useless_projection(n, rels) -> int:
def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
'''
Removes projections that are over all the fields
'''
changes = 0
if n.name == PROJECTION and \
set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')):
changes = 1
replace_node(n, n.child)
return n.child, 1
return changes + recoursive_scan(useless_projection, n, rels)
return n, 0
general_optimizations = [
duplicated_select,
@ -714,6 +578,7 @@ general_optimizations = [
duplicated_projection,
selection_inside_projection,
subsequent_renames,
futile_renames,
swap_rename_select,
futile_union_intersection_subtraction,
swap_union_renames,
@ -726,6 +591,3 @@ specific_optimizations = [
projection_and_union,
useless_projection,
]
if __name__ == "__main__":
print (tokenize_select("skill == 'C' and id % 2 == 0"))

@ -1,5 +1,5 @@
# Relational
# Copyright (C) 2008-2016 Salvo "LtWorf" Tomaselli
# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
#
# Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -22,23 +22,22 @@
# relational query, or it can be a parse tree for a relational expression (ie: class parser.node).
# The functions will always return a string with the optimized query, but if a parse tree was provided,
# the parse tree itself will be modified accordingly.
from typing import Union, Optional, Dict, Any
from typing import Union, Optional, Dict, Any, Tuple
from relational.relation import Relation
from relational import optimizations
from relational.parser import Node, RELATION, UNARY, BINARY, op_functions, tokenize, tree
from relational.parser import Node, Variable, Unary, Binary, op_functions, tokenize, tree
from relational import querysplit
from relational.maintenance import UserInterface
ContextDict = Dict[str,Any]
def optimize_program(code, rels: ContextDict):
def optimize_program(code, rels: Dict[str, Relation]):
'''
Optimize an entire program, composed by multiple expressions
and assignments.
'''
lines = code.split('\n')
context = {} # type: ContextDict
context = {}
for line in lines:
line = line.strip()
@ -53,7 +52,7 @@ def optimize_program(code, rels: ContextDict):
return querysplit.split(node, rels)
def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
'''This function performs all the available optimizations.
expression : see documentation of this module
@ -82,23 +81,23 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool
total = 0
if specific:
for i in optimizations.specific_optimizations:
res = i(n, rels) # Performs the optimization
if res != 0 and dbg:
n, c = recursive_scan(i, n, rels)
if c != 0 and dbg:
debug.append(str(n))
total += res
total += c
if general:
for i in optimizations.general_optimizations:
res = i(n) # Performs the optimization
if res != 0 and dbg:
n, c = recursive_scan(i, n, None)
if c != 0 and dbg:
debug.append(str(n))
total += res
total += c
if tostr:
return str(n)
else:
return n
def specific_optimize(expression, rels: ContextDict):
def specific_optimize(expression, rels: Dict[str, Relation]):
'''This function performs specific optimizations. Means that it will need to
know the fields used by the relations.
@ -117,3 +116,35 @@ def general_optimize(expression):
Return value: this will return an optimized version of the expression'''
return optimize_all(expression, None, specific=False, general=True)
def recursive_scan(function, node, rels) -> Tuple[Node, int]:
'''Does a recursive optimization on the tree.
This function will recursively execute the function given
as "function" parameter starting from node to all the tree.
if rels is provided it will be passed as argument to the function.
Otherwise the function will be called just on the node.
Result value: function is supposed to return the amount of changes
it has performed on the tree.
The various result will be added up and this final value will be the
returned value.'''
args = []
if rels:
args.append(rels)
changes = 0
node, c = function(node, *args)
changes += c
if isinstance(node, Unary):
node.child, c = recursive_scan(function, node.child, rels)
changes += c
elif isinstance(node, Binary):
node.left, c = recursive_scan(function, node.left, rels)
changes += c
node.right, c = recursive_scan(function, node.right, rels)
changes += c
return node, changes

@ -1,5 +1,5 @@
# Relational
# Copyright (C) 2008-2017 Salvo "LtWorf" Tomaselli
# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
#
# Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -24,14 +24,11 @@
#
# Language definition here:
# http://ltworf.github.io/relational/grammar.html
from typing import Optional, Union, List, Any
from typing import Optional, Union, List, Any, Dict
from dataclasses import dataclass
from relational import rtypes
RELATION = 0
UNARY = 1
BINARY = 2
PRODUCT = '*'
DIFFERENCE = '-'
UNION = ''
@ -84,9 +81,8 @@ class CallableString(str):
'''
return eval(self, context)
@dataclass
class Node:
'''This class is a node of a relational expression. Leaves are relations
and internal nodes are operations.
@ -102,72 +98,12 @@ class Node:
operation.
This class is used to convert an expression into python code.'''
kind = None # type: Optional[int]
__hash__ = None # type: None
name: str
def __init__(self, expression: Optional[list] = None) -> None:
'''Generates the tree from the tokenized expression
If no expression is specified then it will create an empty node'''
if expression is None or len(expression) == 0:
return
def __init__(self, name: str) -> None:
raise NotImplementedError('This is supposed to be an abstract class')
# If the list contains only a list, it will consider the lower level list.
# This will allow things like ((((((a))))) to work
while len(expression) == 1 and isinstance(expression[0], list):
expression = expression[0]
# The list contains only 1 string. Means it is the name of a relation
if len(expression) == 1:
self.kind = RELATION
self.name = expression[0]
if not rtypes.is_valid_relation_name(self.name):
raise ParserException(
u"'%s' is not a valid relation name" % self.name)
return
# Expression from right to left, searching for binary operators
# this means that binary operators have lesser priority than
# unary operators.
# It finds the operator with lesser priority, uses it as root of this
# (sub)tree using everything on its left as left parameter (so building
# a left subtree with the part of the list located on left) and doing
# the same on right.
# Since it searches for strings, and expressions into parenthesis are
# within sub-lists, they won't be found here, ensuring that they will
# have highest priority.
for i in range(len(expression) - 1, -1, -1):
if expression[i] in b_operators: # Binary operator
self.kind = BINARY
self.name = expression[i]
if len(expression[:i]) == 0:
raise ParserException(
u"Expected left operand for '%s'" % self.name)
if len(expression[i + 1:]) == 0:
raise ParserException(
u"Expected right operand for '%s'" % self.name)
self.left = node(expression[:i])
self.right = node(expression[i + 1:])
return
'''Searches for unary operators, parsing from right to left'''
for i in range(len(expression) - 1, -1, -1):
if expression[i] in u_operators: # Unary operator
self.kind = UNARY
self.name = expression[i]
if len(expression) <= i + 2:
raise ParserException(
u"Expected more tokens in '%s'" % self.name)
self.prop = expression[1 + i].strip()
self.child = node(expression[2 + i])
return
raise ParserException("Expected operator in '%s'" % expression)
def toCode(self):
def toCode(self): #FIXME return type
'''This method converts the AST into a python code object'''
code = self._toPython()
return compile(code, '<relational_expression>', 'eval')
@ -181,25 +117,7 @@ class Node:
return CallableString(self._toPython())
def _toPython(self) -> str:
'''
Same as toPython but returns a regular string
'''
if self.name in b_operators:
return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
elif self.name in u_operators:
prop = self.prop
# Converting parameters
if self.name == PROJECTION:
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
elif self.name == RENAME:
prop = '{\"%s\"}' % prop.replace(
',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
else: # Selection
prop = repr(prop)
return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
return self.name
raise NotImplementedError()
def printtree(self, level: int = 0) -> str:
'''returns a representation of the tree using indentation'''
@ -216,27 +134,20 @@ class Node:
return '\n' + r
def get_left_leaf(self) -> 'Node':
'''This function returns the leftmost leaf in the tree.'''
if self.kind == RELATION:
return self
elif self.kind == UNARY:
return self.child.get_left_leaf()
elif self.kind == BINARY:
return self.left.get_left_leaf()
raise ValueError('What kind of alien object is this?')
raise NotImplementedError()
def result_format(self, rels: dict) -> list:
def result_format(self, rels: dict) -> list: #FIXME types
'''This function returns a list containing the fields that the resulting relation will have.
It requires a dictionary where keys are the names of the relations and the values are
the relation objects.'''
if not isinstance(rels, dict):
raise TypeError('Can\'t be of None type')
if self.kind == RELATION:
if isinstance(self, Variable): #FIXME this is ugly
return list(rels[self.name].header)
elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION):
elif isinstance(self, Binary) and self.name in (DIFFERENCE, UNION, INTERSECTION):
return self.left.result_format(rels)
elif self.kind == BINARY and self.name == DIVISION:
elif isinstance(self, Binary) and self.name == DIVISION:
return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
elif self.name == PROJECTION:
return [i.strip() for i in self.prop.split(',')]
@ -259,7 +170,7 @@ class Node:
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
raise ValueError('What kind of alien object is this?')
def __eq__(self, other):
def __eq__(self, other): #FIXME
if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
return False
@ -271,22 +182,151 @@ class Node:
return self.left == other.left and self.right == other.right
return True
@dataclass
class Variable(Node):
def _toPython(self) -> str:
return self.name
def __str__(self):
if (self.kind == RELATION):
return self.name
elif (self.kind == UNARY):
return self.name + " " + self.prop + " (" + self.child.__str__() + ")"
elif (self.kind == BINARY):
le = self.left.__str__()
if self.right.kind != BINARY:
re = self.right.__str__()
else:
re = "(" + self.right.__str__() + ")"
return (le + self.name + re)
raise ValueError('What kind of alien object is this?')
return self.name
def get_left_leaf(self) -> Node:
return self
def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> Optional[int]:
@dataclass
class Binary(Node):
left: Node
right: Node
def get_left_leaf(self) -> Node:
return self.left.get_left_leaf()
def _toPython(self) -> str:
return '%s.%s(%s)' % (self.left._toPython(), op_functions[self.name], self.right._toPython())
def __str__(self):
le = self.left.__str__()
if isinstance(self.right, Binary):
re = "(" + self.right.__str__() + ")"
else:
re = self.right.__str__()
return (le + self.name + re) #TODO use fstrings
@dataclass
class Unary(Node):
prop: str
child: Node
def get_left_leaf(self) -> Node:
return self.child.get_left_leaf()
def __str__(self):
return self.name + " " + self.prop + " (" + self.child.__str__() + ")" #TODO use fstrings
def _toPython(self) -> str:
prop = self.prop
# Converting parameters
if self.name == PROJECTION:
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
elif self.name == RENAME:
prop = repr(self.get_rename_prop())
else: # Selection
prop = repr(prop)
return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop)
def get_projection_prop(self) -> List[str]:
if self.name != PROJECTION:
raise ValueError('This is only supported on projection nodes')
return [i.strip() for i in self.prop.split(',')]
def set_projection_prop(self, p: List[str]) -> None:
if self.name != PROJECTION:
raise ValueError('This is only supported on projection nodes')
self.prop = ','.join(p)
def get_rename_prop(self) -> Dict[str, str]:
'''
Returns the dictionary that the rename operation wants
'''
if self.name != RENAME:
raise ValueError('This is only supported on rename nodes')
r = {}
for i in self.prop.split(','):
q = i.split(ARROW)
r[q[0].strip()] = q[1].strip()
return r
def set_rename_prop(self, renames: Dict[str, str]) -> None:
'''
Sets the prop field based on the dictionary for renames
'''
if self.name != RENAME:
raise ValueError('This is only supported on rename nodes')
self.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items())
def parse_tokens(expression: List[Union[list, str]]) -> Node:
'''Generates the tree from the tokenized expression
If no expression is specified then it will create an empty node'''
# If the list contains only a list, it will consider the lower level list.
# This will allow things like ((((((a))))) to work
while len(expression) == 1 and isinstance(expression[0], list):
expression = expression[0]
# The list contains only 1 string. Means it is the name of a relation
if len(expression) == 1:
if not rtypes.is_valid_relation_name(expression[0]):
raise ParserException(
u"'%s' is not a valid relation name" % expression[0])
return Variable(expression[0]) #FIXME Move validation in the object
# Expression from right to left, searching for binary operators
# this means that binary operators have lesser priority than
# unary operators.
# It finds the operator with lesser priority, uses it as root of this
# (sub)tree using everything on its left as left parameter (so building
# a left subtree with the part of the list located on left) and doing
# the same on right.
# Since it searches for strings, and expressions into parenthesis are
# within sub-lists, they won't be found here, ensuring that they will
# have highest priority.
for i in range(len(expression) - 1, -1, -1):
if expression[i] in b_operators: # Binary operator
if len(expression[:i]) == 0:
raise ParserException(
u"Expected left operand for '%s'" % self.name)
if len(expression[i + 1:]) == 0:
raise ParserException(
u"Expected right operand for '%s'" % self.name)
return Binary(expression[i], parse_tokens(expression[:i]), parse_tokens(expression[i + 1:]))
'''Searches for unary operators, parsing from right to left'''
for i in range(len(expression) - 1, -1, -1):
if expression[i] in u_operators: # Unary operator
if len(expression) <= i + 2:
raise ParserException(
u"Expected more tokens in '%s'" % self.name)
return Unary(
expression[i],
prop=expression[1 + i].strip(),
child=parse_tokens(expression[2 + i])
)
raise ParserException('Parse error') #FIXME more details
def _find_matching_parenthesis(expression: str, start=0, openpar='(', closepar=')') -> Optional[int]:
'''This function returns the position of the matching
close parenthesis to the 1st open parenthesis found
starting from start (0 by default)'''
@ -391,7 +431,7 @@ def tokenize(expression: str) -> list:
def tree(expression: str) -> Node:
'''This function parses a relational algebra expression into a AST and returns
the root node using the Node class.'''
return Node(tokenize(expression))
return parse_tokens(tokenize(expression))
def parse(expr: str) -> CallableString:
@ -400,11 +440,3 @@ def parse(expr: str) -> CallableString:
Python expression.
'''
return tree(expr).toPython()
if __name__ == "__main__":
while True:
e = input("Expression: ")
print (parse(e))
# Backwards compatibility
node = Node

@ -0,0 +1 @@
ρ name➡n,age➡a(σTrue(people)) ρ age➡a,name➡n(people)

@ -0,0 +1,9 @@
id,n,chief,a
0,jack,0,22
1,carl,0,20
2,john,1,30
3,dean,1,33
4,eve,0,25
5,duncan,4,30
6,paul,4,30
7,alia,1,28

@ -0,0 +1 @@
σ i%2==0 (ρ id➡i (people))

@ -0,0 +1,5 @@
i,name,chief,age
0,jack,0,22
2,john,1,30
4,eve,0,25
6,paul,4,30