Merge pull request #35 from ltworf/multiline_optimization

Fix and test multiline optimizer
master
Salvo 'LtWorf' Tomaselli 2020-08-26 17:39:53 +07:00 committed by GitHub
commit ea22350463
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 68 additions and 51 deletions

@ -31,28 +31,51 @@ from relational import querysplit
from relational.maintenance import UserInterface from relational.maintenance import UserInterface
def optimize_program(code, rels: Dict[str, Relation]): def optimize_program(code: str, rels: Dict[str, Relation]) -> str:
''' '''
Optimize an entire program, composed by multiple expressions Optimize an entire program, composed by multiple expressions
and assignments. and assignments.
''' '''
raise NotImplementedError()
lines = code.split('\n') lines = code.split('\n')
context = {} context: Dict[str, Node] = {}
for line in lines: for line in lines:
# skip comments or empty lines
line = line.strip() line = line.strip()
if line.startswith(';') or not line: if line.startswith(';') or not line:
continue continue
res, query = UserInterface.split_query(line) res, query = UserInterface.split_query(line)
last_res = res last_res = res
parsed = tree(query) parsed = tree(query)
optimizations.replace_leaves(parsed, context) _replace_leaves(parsed, context)
context[res] = parsed context[res] = parsed
node = optimize_all(context[last_res], rels, tostr=False) node = optimize_all(context[last_res], rels, tostr=False)
return querysplit.split(node, rels) return querysplit.split(node, rels)
def _replace_leaves(node: Node, context: Dict[str, Node]) -> None:
'''
If a name appearing in node appears
also in context, the parse tree is
modified to replace the node with the
subtree found in context.
'''
if isinstance(node, Unary):
_replace_leaves(node.child, context)
if isinstance(node.child, Variable) and node.child.name in context:
node.child = context[node.child.name]
elif isinstance(node, Binary):
_replace_leaves(node.left, context)
_replace_leaves(node.right, context)
if isinstance(node.left, Variable) and node.left.name in context:
node.left = context[node.left.name]
if isinstance(node.right, Variable) and node.right.name in context:
node.right = context[node.right.name]
def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]: def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
'''This function performs all the available optimizations. '''This function performs all the available optimizations.
@ -70,20 +93,20 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
elif isinstance(expression, Node): elif isinstance(expression, Node):
n = expression n = expression
else: else:
raise (TypeError("expression must be a string or a node")) raise TypeError('expression must be a string or a node')
total = 1 total = 1
while total != 0: while total != 0:
total = 0 total = 0
if specific: if specific:
for i in optimizations.specific_optimizations: for i in optimizations.specific_optimizations:
n, c = recursive_scan(i, n, rels) n, c = _recursive_scan(i, n, rels)
if c != 0 and isinstance(debug, list): if c != 0 and isinstance(debug, list):
debug.append(str(n)) debug.append(str(n))
total += c total += c
if general: if general:
for j in optimizations.general_optimizations: for j in optimizations.general_optimizations:
n, c = recursive_scan(j, n, None) n, c = _recursive_scan(j, n, None)
if c != 0 and isinstance(debug, list): if c != 0 and isinstance(debug, list):
debug.append(str(n)) debug.append(str(n))
total += c total += c
@ -93,28 +116,7 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
return n return n
def specific_optimize(expression, rels: Dict[str, Relation]): def _recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tuple[Node, int]:
'''This function performs specific optimizations. Means that it will need to
know the fields used by the relations.
expression : see documentation of this module
rels: dic with relation name as key, and relation istance as value
Return value: this will return an optimized version of the expression'''
return optimize_all(expression, rels, specific=True, general=False)
def general_optimize(expression):
'''This function performs general optimizations. Means that it will not need to
know the fields used by the relations
expression : see documentation of this module
Return value: this will return an optimized version of the expression'''
return optimize_all(expression, None, specific=False, general=True)
def recursive_scan(function, node, rels) -> Tuple[Node, int]:
'''Does a recursive optimization on the tree. '''Does a recursive optimization on the tree.
This function will recursively execute the function given This function will recursively execute the function given
@ -128,7 +130,7 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
returned value.''' returned value.'''
args = [] args = []
if rels: if rels is not None:
args.append(rels) args.append(rels)
changes = 0 changes = 0
@ -136,11 +138,11 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
changes += c changes += c
if isinstance(node, Unary): if isinstance(node, Unary):
node.child, c = recursive_scan(function, node.child, rels) node.child, c = _recursive_scan(function, node.child, rels)
changes += c changes += c
elif isinstance(node, Binary): elif isinstance(node, Binary):
node.left, c = recursive_scan(function, node.left, rels) node.left, c = _recursive_scan(function, node.left, rels)
changes += c changes += c
node.right, c = recursive_scan(function, node.right, rels) node.right, c = _recursive_scan(function, node.right, rels)
changes += c changes += c
return node, changes return node, changes

@ -1,5 +1,5 @@
# Relational # Relational
# Copyright (C) 2016 Salvo "LtWorf" Tomaselli # Copyright (C) 2016-2020 Salvo "LtWorf" Tomaselli
# #
# Relational is free software: you can redistribute it and/or modify # Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@ -18,15 +18,19 @@
# #
# This module splits a query into a program. # This module splits a query into a program.
from typing import List, Dict, Tuple
from relational import parser from relational.parser import Node, Binary, Unary, Variable
__all__ = ['split']
class Program: class Program:
def __init__(self, rels): def __init__(self, rels) -> None:
self.queries = [] self.queries: List[Tuple[str, Node]] = []
self.dictionary = {} # Key is the query, value is the relation self.dictionary: Dict[str, Node] = {} # Key is the query, value is the relation
self.vgen = vargen(rels, 'optm_') self.vgen = _vargen(rels, 'optm_')
def __str__(self): def __str__(self):
r = '' r = ''
@ -34,7 +38,7 @@ class Program:
r += '%s = %s' % (q[0], q[1]) + '\n' r += '%s = %s' % (q[0], q[1]) + '\n'
return r.rstrip() return r.rstrip()
def append_query(self, node): def append_query(self, node: Node) -> Node:
strnode = str(node) strnode = str(node)
rel = self.dictionary.get(strnode) rel = self.dictionary.get(strnode)
@ -43,29 +47,29 @@ class Program:
qname = next(self.vgen) qname = next(self.vgen)
self.queries.append((qname, node)) self.queries.append((qname, node))
n = parser.Node() n = Variable(qname)
n.kind = parser.RELATION
n.name = qname
self.dictionary[strnode] = n self.dictionary[strnode] = n
return n return n
def _separate(node, program):
if node.kind == parser.UNARY and node.child.kind != parser.RELATION: def _separate(node: Node, program: Program) -> None:
if isinstance(node, Unary) and isinstance(node.child, Variable):
_separate(node.child, program) _separate(node.child, program)
rel = program.append_query(node.child) rel = program.append_query(node.child)
node.child = rel node.child = rel
elif node.kind == parser.BINARY: elif isinstance(node, Binary):
if node.left.kind != parser.RELATION: if not isinstance(node.left, Variable):
_separate(node.left, program) _separate(node.left, program)
rel = program.append_query(node.left) rel = program.append_query(node.left)
node.left = rel node.left = rel
if node.right.kind != parser.RELATION: if not isinstance(node.right, Variable):
_separate(node.right, program) _separate(node.right, program)
rel = program.append_query(node.right) rel = program.append_query(node.right)
node.right = rel node.right = rel
program.append_query(node) program.append_query(node)
def vargen(avoid, prefix=''):
def _vargen(avoid: str, prefix: str=''):
''' '''
Generates temp variables. Generates temp variables.
@ -87,12 +91,15 @@ def vargen(avoid, prefix=''):
yield r yield r
count += 1 count += 1
def split(node, rels):
def split(node, rels) -> str:
''' '''
Split a query into a program. Split a query into a program.
The idea is that if there are duplicated subdtrees they The idea is that if there are duplicated subtrees they
get executed only once. get executed only once.
This is used by the optimizer module.
''' '''
p = Program(rels) p = Program(rels)
_separate(node, p) _separate(node, p)

@ -0,0 +1,8 @@
from relational.optimizer import optimize_program
a = optimize_program('''ppl_skills = people ⧓skills
ppl_skills1 = ppl_skills (people skills)
ppl_skills ppl_skills1 dates''', {})
assert a == '''optm_a = people⧓skills
optm_b = optm_adates'''