|
|
@ -1,5 +1,5 @@
|
|
|
|
# Relational
|
|
|
|
# Relational
|
|
|
|
# Copyright (C) 2008-2017 Salvo "LtWorf" Tomaselli
|
|
|
|
# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# Relational is free software: you can redistribute it and/or modify
|
|
|
|
# Relational is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
@ -25,6 +25,7 @@
|
|
|
|
# Language definition here:
|
|
|
|
# Language definition here:
|
|
|
|
# http://ltworf.github.io/relational/grammar.html
|
|
|
|
# http://ltworf.github.io/relational/grammar.html
|
|
|
|
from typing import Optional, Union, List, Any
|
|
|
|
from typing import Optional, Union, List, Any
|
|
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
|
|
|
|
|
|
from relational import rtypes
|
|
|
|
from relational import rtypes
|
|
|
|
|
|
|
|
|
|
|
@ -84,9 +85,8 @@ class CallableString(str):
|
|
|
|
'''
|
|
|
|
'''
|
|
|
|
return eval(self, context)
|
|
|
|
return eval(self, context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Node:
|
|
|
|
class Node:
|
|
|
|
|
|
|
|
|
|
|
|
'''This class is a node of a relational expression. Leaves are relations
|
|
|
|
'''This class is a node of a relational expression. Leaves are relations
|
|
|
|
and internal nodes are operations.
|
|
|
|
and internal nodes are operations.
|
|
|
|
|
|
|
|
|
|
|
@ -102,72 +102,12 @@ class Node:
|
|
|
|
operation.
|
|
|
|
operation.
|
|
|
|
|
|
|
|
|
|
|
|
This class is used to convert an expression into python code.'''
|
|
|
|
This class is used to convert an expression into python code.'''
|
|
|
|
kind = None # type: Optional[int]
|
|
|
|
name: str
|
|
|
|
__hash__ = None # type: None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, expression: Optional[list] = None) -> None:
|
|
|
|
def __init__(self, name: str) -> None:
|
|
|
|
'''Generates the tree from the tokenized expression
|
|
|
|
raise NotImplementedError('This is supposed to be an abstract class')
|
|
|
|
If no expression is specified then it will create an empty node'''
|
|
|
|
|
|
|
|
if expression is None or len(expression) == 0:
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# If the list contains only a list, it will consider the lower level list.
|
|
|
|
def toCode(self): #FIXME return type
|
|
|
|
# This will allow things like ((((((a))))) to work
|
|
|
|
|
|
|
|
while len(expression) == 1 and isinstance(expression[0], list):
|
|
|
|
|
|
|
|
expression = expression[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# The list contains only 1 string. Means it is the name of a relation
|
|
|
|
|
|
|
|
if len(expression) == 1:
|
|
|
|
|
|
|
|
self.kind = RELATION
|
|
|
|
|
|
|
|
self.name = expression[0]
|
|
|
|
|
|
|
|
if not rtypes.is_valid_relation_name(self.name):
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"'%s' is not a valid relation name" % self.name)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Expression from right to left, searching for binary operators
|
|
|
|
|
|
|
|
# this means that binary operators have lesser priority than
|
|
|
|
|
|
|
|
# unary operators.
|
|
|
|
|
|
|
|
# It finds the operator with lesser priority, uses it as root of this
|
|
|
|
|
|
|
|
# (sub)tree using everything on its left as left parameter (so building
|
|
|
|
|
|
|
|
# a left subtree with the part of the list located on left) and doing
|
|
|
|
|
|
|
|
# the same on right.
|
|
|
|
|
|
|
|
# Since it searches for strings, and expressions into parenthesis are
|
|
|
|
|
|
|
|
# within sub-lists, they won't be found here, ensuring that they will
|
|
|
|
|
|
|
|
# have highest priority.
|
|
|
|
|
|
|
|
for i in range(len(expression) - 1, -1, -1):
|
|
|
|
|
|
|
|
if expression[i] in b_operators: # Binary operator
|
|
|
|
|
|
|
|
self.kind = BINARY
|
|
|
|
|
|
|
|
self.name = expression[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(expression[:i]) == 0:
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"Expected left operand for '%s'" % self.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(expression[i + 1:]) == 0:
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"Expected right operand for '%s'" % self.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.left = node(expression[:i])
|
|
|
|
|
|
|
|
self.right = node(expression[i + 1:])
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
'''Searches for unary operators, parsing from right to left'''
|
|
|
|
|
|
|
|
for i in range(len(expression) - 1, -1, -1):
|
|
|
|
|
|
|
|
if expression[i] in u_operators: # Unary operator
|
|
|
|
|
|
|
|
self.kind = UNARY
|
|
|
|
|
|
|
|
self.name = expression[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(expression) <= i + 2:
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"Expected more tokens in '%s'" % self.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.prop = expression[1 + i].strip()
|
|
|
|
|
|
|
|
self.child = node(expression[2 + i])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
raise ParserException("Expected operator in '%s'" % expression)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def toCode(self):
|
|
|
|
|
|
|
|
'''This method converts the AST into a python code object'''
|
|
|
|
'''This method converts the AST into a python code object'''
|
|
|
|
code = self._toPython()
|
|
|
|
code = self._toPython()
|
|
|
|
return compile(code, '<relational_expression>', 'eval')
|
|
|
|
return compile(code, '<relational_expression>', 'eval')
|
|
|
@ -181,25 +121,7 @@ class Node:
|
|
|
|
return CallableString(self._toPython())
|
|
|
|
return CallableString(self._toPython())
|
|
|
|
|
|
|
|
|
|
|
|
def _toPython(self) -> str:
|
|
|
|
def _toPython(self) -> str:
|
|
|
|
'''
|
|
|
|
raise NotImplementedError()
|
|
|
|
Same as toPython but returns a regular string
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
if self.name in b_operators:
|
|
|
|
|
|
|
|
return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
|
|
|
|
|
|
|
|
elif self.name in u_operators:
|
|
|
|
|
|
|
|
prop = self.prop
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Converting parameters
|
|
|
|
|
|
|
|
if self.name == PROJECTION:
|
|
|
|
|
|
|
|
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
|
|
|
|
|
|
|
|
elif self.name == RENAME:
|
|
|
|
|
|
|
|
prop = '{\"%s\"}' % prop.replace(
|
|
|
|
|
|
|
|
',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
|
|
|
|
|
|
|
|
else: # Selection
|
|
|
|
|
|
|
|
prop = repr(prop)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
|
|
|
|
|
|
|
|
return self.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def printtree(self, level: int = 0) -> str:
|
|
|
|
def printtree(self, level: int = 0) -> str:
|
|
|
|
'''returns a representation of the tree using indentation'''
|
|
|
|
'''returns a representation of the tree using indentation'''
|
|
|
@ -216,27 +138,20 @@ class Node:
|
|
|
|
return '\n' + r
|
|
|
|
return '\n' + r
|
|
|
|
|
|
|
|
|
|
|
|
def get_left_leaf(self) -> 'Node':
|
|
|
|
def get_left_leaf(self) -> 'Node':
|
|
|
|
'''This function returns the leftmost leaf in the tree.'''
|
|
|
|
raise NotImplementedError()
|
|
|
|
if self.kind == RELATION:
|
|
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
elif self.kind == UNARY:
|
|
|
|
|
|
|
|
return self.child.get_left_leaf()
|
|
|
|
|
|
|
|
elif self.kind == BINARY:
|
|
|
|
|
|
|
|
return self.left.get_left_leaf()
|
|
|
|
|
|
|
|
raise ValueError('What kind of alien object is this?')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def result_format(self, rels: dict) -> list:
|
|
|
|
def result_format(self, rels: dict) -> list: #FIXME types
|
|
|
|
'''This function returns a list containing the fields that the resulting relation will have.
|
|
|
|
'''This function returns a list containing the fields that the resulting relation will have.
|
|
|
|
It requires a dictionary where keys are the names of the relations and the values are
|
|
|
|
It requires a dictionary where keys are the names of the relations and the values are
|
|
|
|
the relation objects.'''
|
|
|
|
the relation objects.'''
|
|
|
|
if not isinstance(rels, dict):
|
|
|
|
if not isinstance(rels, dict):
|
|
|
|
raise TypeError('Can\'t be of None type')
|
|
|
|
raise TypeError('Can\'t be of None type')
|
|
|
|
|
|
|
|
|
|
|
|
if self.kind == RELATION:
|
|
|
|
if isinstance(self, Variable): #FIXME this is ugly
|
|
|
|
return list(rels[self.name].header)
|
|
|
|
return list(rels[self.name].header)
|
|
|
|
elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION):
|
|
|
|
elif isinstance(self, Binary) and self.name in (DIFFERENCE, UNION, INTERSECTION):
|
|
|
|
return self.left.result_format(rels)
|
|
|
|
return self.left.result_format(rels)
|
|
|
|
elif self.kind == BINARY and self.name == DIVISION:
|
|
|
|
elif isinstance(self, Binary) and self.name == DIVISION:
|
|
|
|
return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
|
|
|
|
return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
|
|
|
|
elif self.name == PROJECTION:
|
|
|
|
elif self.name == PROJECTION:
|
|
|
|
return [i.strip() for i in self.prop.split(',')]
|
|
|
|
return [i.strip() for i in self.prop.split(',')]
|
|
|
@ -259,7 +174,7 @@ class Node:
|
|
|
|
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
|
|
|
|
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
|
|
|
|
raise ValueError('What kind of alien object is this?')
|
|
|
|
raise ValueError('What kind of alien object is this?')
|
|
|
|
|
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
def __eq__(self, other): #FIXME
|
|
|
|
if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
|
|
|
|
if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
|
|
|
|
return False
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
@ -271,22 +186,121 @@ class Node:
|
|
|
|
return self.left == other.left and self.right == other.right
|
|
|
|
return self.left == other.left and self.right == other.right
|
|
|
|
return True
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
|
|
|
if (self.kind == RELATION):
|
|
|
|
@dataclass
|
|
|
|
|
|
|
|
class Variable(Node):
|
|
|
|
|
|
|
|
def _toPython(self) -> str:
|
|
|
|
return self.name
|
|
|
|
return self.name
|
|
|
|
elif (self.kind == UNARY):
|
|
|
|
|
|
|
|
return self.name + " " + self.prop + " (" + self.child.__str__() + ")"
|
|
|
|
def __str__(self):
|
|
|
|
elif (self.kind == BINARY):
|
|
|
|
return self.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_left_leaf(self) -> Node:
|
|
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
|
|
|
class Binary(Node):
|
|
|
|
|
|
|
|
left: Node
|
|
|
|
|
|
|
|
right: Node
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_left_leaf(self) -> Node:
|
|
|
|
|
|
|
|
return self.left.get_left_leaf()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _toPython(self) -> str:
|
|
|
|
|
|
|
|
return '%s.%s(%s)' % (self.left._toPython(), op_functions[self.name], self.right._toPython())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
le = self.left.__str__()
|
|
|
|
le = self.left.__str__()
|
|
|
|
if self.right.kind != BINARY:
|
|
|
|
if isinstance(self.right, Binary):
|
|
|
|
re = self.right.__str__()
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
re = "(" + self.right.__str__() + ")"
|
|
|
|
re = "(" + self.right.__str__() + ")"
|
|
|
|
return (le + self.name + re)
|
|
|
|
else:
|
|
|
|
raise ValueError('What kind of alien object is this?')
|
|
|
|
re = self.right.__str__()
|
|
|
|
|
|
|
|
return (le + self.name + re) #TODO use fstrings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> Optional[int]:
|
|
|
|
@dataclass
|
|
|
|
|
|
|
|
class Unary(Node):
|
|
|
|
|
|
|
|
prop: str
|
|
|
|
|
|
|
|
child: Node
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_left_leaf(self) -> Node:
|
|
|
|
|
|
|
|
return self.child.get_left_leaf()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
|
|
|
return self.name + " " + self.prop + " (" + self.child.__str__() + ")" #TODO use fstrings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _toPython(self) -> str:
|
|
|
|
|
|
|
|
prop = self.prop
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Converting parameters
|
|
|
|
|
|
|
|
if self.name == PROJECTION:
|
|
|
|
|
|
|
|
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
|
|
|
|
|
|
|
|
elif self.name == RENAME:
|
|
|
|
|
|
|
|
prop = '{\"%s\"}' % prop.replace(
|
|
|
|
|
|
|
|
',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
|
|
|
|
|
|
|
|
else: # Selection
|
|
|
|
|
|
|
|
prop = repr(prop)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_tokens(expression: List[Union[list, str]]) -> Node:
|
|
|
|
|
|
|
|
'''Generates the tree from the tokenized expression
|
|
|
|
|
|
|
|
If no expression is specified then it will create an empty node'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# If the list contains only a list, it will consider the lower level list.
|
|
|
|
|
|
|
|
# This will allow things like ((((((a))))) to work
|
|
|
|
|
|
|
|
while len(expression) == 1 and isinstance(expression[0], list):
|
|
|
|
|
|
|
|
expression = expression[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# The list contains only 1 string. Means it is the name of a relation
|
|
|
|
|
|
|
|
if len(expression) == 1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not rtypes.is_valid_relation_name(expression[0]):
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"'%s' is not a valid relation name" % expression[0])
|
|
|
|
|
|
|
|
return Variable(expression[0]) #FIXME Move validation in the object
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Expression from right to left, searching for binary operators
|
|
|
|
|
|
|
|
# this means that binary operators have lesser priority than
|
|
|
|
|
|
|
|
# unary operators.
|
|
|
|
|
|
|
|
# It finds the operator with lesser priority, uses it as root of this
|
|
|
|
|
|
|
|
# (sub)tree using everything on its left as left parameter (so building
|
|
|
|
|
|
|
|
# a left subtree with the part of the list located on left) and doing
|
|
|
|
|
|
|
|
# the same on right.
|
|
|
|
|
|
|
|
# Since it searches for strings, and expressions into parenthesis are
|
|
|
|
|
|
|
|
# within sub-lists, they won't be found here, ensuring that they will
|
|
|
|
|
|
|
|
# have highest priority.
|
|
|
|
|
|
|
|
for i in range(len(expression) - 1, -1, -1):
|
|
|
|
|
|
|
|
if expression[i] in b_operators: # Binary operator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(expression[:i]) == 0:
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"Expected left operand for '%s'" % self.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(expression[i + 1:]) == 0:
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"Expected right operand for '%s'" % self.name)
|
|
|
|
|
|
|
|
return Binary(expression[i], parse_tokens(expression[:i]), parse_tokens(expression[i + 1:]))
|
|
|
|
|
|
|
|
'''Searches for unary operators, parsing from right to left'''
|
|
|
|
|
|
|
|
for i in range(len(expression) - 1, -1, -1):
|
|
|
|
|
|
|
|
if expression[i] in u_operators: # Unary operator
|
|
|
|
|
|
|
|
if len(expression) <= i + 2:
|
|
|
|
|
|
|
|
raise ParserException(
|
|
|
|
|
|
|
|
u"Expected more tokens in '%s'" % self.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return Unary(
|
|
|
|
|
|
|
|
expression[i],
|
|
|
|
|
|
|
|
prop=expression[1 + i].strip(),
|
|
|
|
|
|
|
|
child=parse_tokens(expression[2 + i])
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
raise ParserException('Parse error') #FIXME more details
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _find_matching_parenthesis(expression: str, start=0, openpar='(', closepar=')') -> Optional[int]:
|
|
|
|
'''This function returns the position of the matching
|
|
|
|
'''This function returns the position of the matching
|
|
|
|
close parenthesis to the 1st open parenthesis found
|
|
|
|
close parenthesis to the 1st open parenthesis found
|
|
|
|
starting from start (0 by default)'''
|
|
|
|
starting from start (0 by default)'''
|
|
|
@ -391,7 +405,7 @@ def tokenize(expression: str) -> list:
|
|
|
|
def tree(expression: str) -> Node:
|
|
|
|
def tree(expression: str) -> Node:
|
|
|
|
'''This function parses a relational algebra expression into a AST and returns
|
|
|
|
'''This function parses a relational algebra expression into a AST and returns
|
|
|
|
the root node using the Node class.'''
|
|
|
|
the root node using the Node class.'''
|
|
|
|
return Node(tokenize(expression))
|
|
|
|
return parse_tokens(tokenize(expression))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse(expr: str) -> CallableString:
|
|
|
|
def parse(expr: str) -> CallableString:
|
|
|
|