From 7228c86d72b50c55f70cf52beb3fd1b9e3fcebb0 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Wed, 26 Aug 2020 17:14:22 +0200 Subject: [PATCH 1/5] I had completely forgotten about multiline optimizations --- relational/optimizer.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/relational/optimizer.py b/relational/optimizer.py index b3b04ee..178da5f 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -31,28 +31,51 @@ from relational import querysplit from relational.maintenance import UserInterface -def optimize_program(code, rels: Dict[str, Relation]): +def optimize_program(code: str, rels: Dict[str, Relation]) -> str: ''' Optimize an entire program, composed by multiple expressions and assignments. ''' - raise NotImplementedError() lines = code.split('\n') - context = {} + context: Dict[str, Node] = {} for line in lines: + # skip comments or empty lines line = line.strip() if line.startswith(';') or not line: continue + + res, query = UserInterface.split_query(line) last_res = res parsed = tree(query) - optimizations.replace_leaves(parsed, context) + replace_leaves(parsed, context) context[res] = parsed node = optimize_all(context[last_res], rels, tostr=False) return querysplit.split(node, rels) +def replace_leaves(node: Node, context: Dict[str, Node]) -> None: + ''' + If a name appearing in node appears + also in context, the parse tree is + modified to replace the node with the + subtree found in context. + ''' + if isinstance(node, Unary): + replace_leaves(node.child, context) + + if isinstance(node.child, Variable) and node.child.name in context: + node.child = context[node.child.name] + elif isinstance(node, Binary): + replace_leaves(node.left, context) + replace_leaves(node.right, context) + if isinstance(node.left, Variable) and node.left.name in context: + node.left = context[node.left.name] + if isinstance(node.right, Variable) and node.right.name in context: + node.right = context[node.right.name] + + def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]: '''This function performs all the available optimizations. From 0af82fb225ae2405e8c96b3523a991b7210cb39c Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Wed, 26 Aug 2020 17:15:24 +0200 Subject: [PATCH 2/5] Python test for multiline --- tests_dir/multiline_optimization.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests_dir/multiline_optimization.py diff --git a/tests_dir/multiline_optimization.py b/tests_dir/multiline_optimization.py new file mode 100644 index 0000000..b02c77f --- /dev/null +++ b/tests_dir/multiline_optimization.py @@ -0,0 +1,8 @@ +from relational.optimizer import optimize_program + +a = optimize_program('''ppl_skills = people ⧓skills +ppl_skills1 = ppl_skills ∪ (people ⧓skills) +ppl_skills ∩ ppl_skills1 ⧓ dates''', {}) + +assert a == '''optm_a = people⧓skills +optm_b = optm_a⧓dates''' From 1181042ee09720a6c50a97e493d10b07e45b3911 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Wed, 26 Aug 2020 17:15:51 +0200 Subject: [PATCH 3/5] Handle empty context --- relational/optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/relational/optimizer.py b/relational/optimizer.py index 178da5f..ea2599d 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -137,7 +137,7 @@ def general_optimize(expression): return optimize_all(expression, None, specific=False, general=True) -def recursive_scan(function, node, rels) -> Tuple[Node, int]: +def recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tuple[Node, int]: '''Does a recursive optimization on the tree. This function will recursively execute the function given @@ -151,7 +151,7 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]: returned value.''' args = [] - if rels: + if rels is not None: args.append(rels) changes = 0 From a727c51e751459750155919a52d319d39d7f2d65 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Wed, 26 Aug 2020 17:16:14 +0200 Subject: [PATCH 4/5] Handle new parser nodes --- relational/querysplit.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/relational/querysplit.py b/relational/querysplit.py index d6cd699..94310c2 100644 --- a/relational/querysplit.py +++ b/relational/querysplit.py @@ -1,5 +1,5 @@ # Relational -# Copyright (C) 2016 Salvo "LtWorf" Tomaselli +# Copyright (C) 2016-2020 Salvo "LtWorf" Tomaselli # # Relational is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,14 +18,15 @@ # # This module splits a query into a program. +from typing import List, Dict, Tuple -from relational import parser +from relational.parser import Node, Binary, Unary, Variable class Program: - def __init__(self, rels): - self.queries = [] - self.dictionary = {} # Key is the query, value is the relation + def __init__(self, rels) -> None: + self.queries: List[Tuple[str, Node]] = [] + self.dictionary: Dict[str, Node] = {} # Key is the query, value is the relation self.vgen = vargen(rels, 'optm_') def __str__(self): @@ -34,7 +35,7 @@ class Program: r += '%s = %s' % (q[0], q[1]) + '\n' return r.rstrip() - def append_query(self, node): + def append_query(self, node: Node) -> Node: strnode = str(node) rel = self.dictionary.get(strnode) @@ -43,29 +44,27 @@ class Program: qname = next(self.vgen) self.queries.append((qname, node)) - n = parser.Node() - n.kind = parser.RELATION - n.name = qname + n = Variable(qname) self.dictionary[strnode] = n return n -def _separate(node, program): - if node.kind == parser.UNARY and node.child.kind != parser.RELATION: +def _separate(node: Node, program: Program) -> None: + if isinstance(node, Unary) and isinstance(node.child, Variable): _separate(node.child, program) rel = program.append_query(node.child) node.child = rel - elif node.kind == parser.BINARY: - if node.left.kind != parser.RELATION: + elif isinstance(node, Binary): + if not isinstance(node.left, Variable): _separate(node.left, program) rel = program.append_query(node.left) node.left = rel - if node.right.kind != parser.RELATION: + if not isinstance(node.right, Variable): _separate(node.right, program) rel = program.append_query(node.right) node.right = rel program.append_query(node) -def vargen(avoid, prefix=''): +def vargen(avoid: str, prefix: str=''): ''' Generates temp variables. @@ -87,7 +86,7 @@ def vargen(avoid, prefix=''): yield r count += 1 -def split(node, rels): +def split(node, rels) -> str: ''' Split a query into a program. From 9d6402b48c89cd7dd5c5f0bea400125dea469f9f Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Wed, 26 Aug 2020 17:26:55 +0200 Subject: [PATCH 5/5] Make a bunch of stuff private Only export the entry points, not all the inner functions. --- relational/optimizer.py | 45 +++++++++++----------------------------- relational/querysplit.py | 14 ++++++++++--- 2 files changed, 23 insertions(+), 36 deletions(-) diff --git a/relational/optimizer.py b/relational/optimizer.py index ea2599d..9de4444 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -49,13 +49,13 @@ def optimize_program(code: str, rels: Dict[str, Relation]) -> str: res, query = UserInterface.split_query(line) last_res = res parsed = tree(query) - replace_leaves(parsed, context) + _replace_leaves(parsed, context) context[res] = parsed node = optimize_all(context[last_res], rels, tostr=False) return querysplit.split(node, rels) -def replace_leaves(node: Node, context: Dict[str, Node]) -> None: +def _replace_leaves(node: Node, context: Dict[str, Node]) -> None: ''' If a name appearing in node appears also in context, the parse tree is @@ -63,13 +63,13 @@ def replace_leaves(node: Node, context: Dict[str, Node]) -> None: subtree found in context. ''' if isinstance(node, Unary): - replace_leaves(node.child, context) + _replace_leaves(node.child, context) if isinstance(node.child, Variable) and node.child.name in context: node.child = context[node.child.name] elif isinstance(node, Binary): - replace_leaves(node.left, context) - replace_leaves(node.right, context) + _replace_leaves(node.left, context) + _replace_leaves(node.right, context) if isinstance(node.left, Variable) and node.left.name in context: node.left = context[node.left.name] if isinstance(node.right, Variable) and node.right.name in context: @@ -93,20 +93,20 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif elif isinstance(expression, Node): n = expression else: - raise (TypeError("expression must be a string or a node")) + raise TypeError('expression must be a string or a node') total = 1 while total != 0: total = 0 if specific: for i in optimizations.specific_optimizations: - n, c = recursive_scan(i, n, rels) + n, c = _recursive_scan(i, n, rels) if c != 0 and isinstance(debug, list): debug.append(str(n)) total += c if general: for j in optimizations.general_optimizations: - n, c = recursive_scan(j, n, None) + n, c = _recursive_scan(j, n, None) if c != 0 and isinstance(debug, list): debug.append(str(n)) total += c @@ -116,28 +116,7 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif return n -def specific_optimize(expression, rels: Dict[str, Relation]): - '''This function performs specific optimizations. Means that it will need to - know the fields used by the relations. - - expression : see documentation of this module - rels: dic with relation name as key, and relation istance as value - - Return value: this will return an optimized version of the expression''' - return optimize_all(expression, rels, specific=True, general=False) - - -def general_optimize(expression): - '''This function performs general optimizations. Means that it will not need to - know the fields used by the relations - - expression : see documentation of this module - - Return value: this will return an optimized version of the expression''' - return optimize_all(expression, None, specific=False, general=True) - - -def recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tuple[Node, int]: +def _recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tuple[Node, int]: '''Does a recursive optimization on the tree. This function will recursively execute the function given @@ -159,11 +138,11 @@ def recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tupl changes += c if isinstance(node, Unary): - node.child, c = recursive_scan(function, node.child, rels) + node.child, c = _recursive_scan(function, node.child, rels) changes += c elif isinstance(node, Binary): - node.left, c = recursive_scan(function, node.left, rels) + node.left, c = _recursive_scan(function, node.left, rels) changes += c - node.right, c = recursive_scan(function, node.right, rels) + node.right, c = _recursive_scan(function, node.right, rels) changes += c return node, changes diff --git a/relational/querysplit.py b/relational/querysplit.py index 94310c2..27140b9 100644 --- a/relational/querysplit.py +++ b/relational/querysplit.py @@ -23,11 +23,14 @@ from typing import List, Dict, Tuple from relational.parser import Node, Binary, Unary, Variable +__all__ = ['split'] + + class Program: def __init__(self, rels) -> None: self.queries: List[Tuple[str, Node]] = [] self.dictionary: Dict[str, Node] = {} # Key is the query, value is the relation - self.vgen = vargen(rels, 'optm_') + self.vgen = _vargen(rels, 'optm_') def __str__(self): r = '' @@ -48,6 +51,7 @@ class Program: self.dictionary[strnode] = n return n + def _separate(node: Node, program: Program) -> None: if isinstance(node, Unary) and isinstance(node.child, Variable): _separate(node.child, program) @@ -64,7 +68,8 @@ def _separate(node: Node, program: Program) -> None: node.right = rel program.append_query(node) -def vargen(avoid: str, prefix: str=''): + +def _vargen(avoid: str, prefix: str=''): ''' Generates temp variables. @@ -86,12 +91,15 @@ def vargen(avoid: str, prefix: str=''): yield r count += 1 + def split(node, rels) -> str: ''' Split a query into a program. - The idea is that if there are duplicated subdtrees they + The idea is that if there are duplicated subtrees they get executed only once. + + This is used by the optimizer module. ''' p = Program(rels) _separate(node, p)