From be9fdd2e84e862a87725432a3a5aa61306513c1b Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Thu, 13 Aug 2020 10:44:39 +0200 Subject: [PATCH 1/9] Make relation immutable --- CHANGELOG | 1 + relational/relation.py | 43 +++++++++--------------------------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5168f1d..8e2454e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,5 @@ 3.0 +- Relations now use frozenset internally and are immutable - Refactored parser to use better typing - Refactored and fixed some optimizations - Added more test cases diff --git a/relational/relation.py b/relational/relation.py index c6589aa..a1396b7 100644 --- a/relational/relation.py +++ b/relational/relation.py @@ -34,8 +34,7 @@ __all__ = [ ] -class Relation: - +class Relation(NamedTuple): ''' This object defines a relation (as a group of consistent tuples) and operations. @@ -58,41 +57,17 @@ class Relation: An empty relation needs a header, and can be filled using the insert() method. ''' - def __hash__(self): - raise NotImplementedError() + header: 'Header' + content: FrozenSet[tuple] - def __init__(self, filename: Optional[Union[str, Path]] = None) -> None: - self._readonly = False - self.content: Set[tuple] = set() - - if filename is None: # Empty relation - self.header = Header([]) - return + @staticmethod + def load(filename: Union[str, Path]) -> 'Relation': with open(filename) as fp: reader = csv.reader(fp) # Creating a csv reader - self.header = Header(next(reader)) # read 1st line - iterator = ((self.insert(i) for i in reader)) - deque(iterator, maxlen=0) - - def _make_duplicate(self, copy: 'Relation') -> None: - '''Flag that the relation "copy" is pointing - to the same set as this relation.''' - - self._readonly = True - copy._readonly = True - - def _make_writable(self, copy_content: bool = True) -> None: - '''If this relation is marked as readonly, this - method will copy the content to make it writable too - - if copy_content is set to false, the caller must - separately copy the content.''' - - if self._readonly: - self._readonly = False - - if copy_content: - self.content = set(self.content) + header = Header(next(reader)) # read 1st line + #FIXME load properly + content = frozenset((tuple(i) for i in reader)) + return Relation(header, content) def __iter__(self): return iter(self.content) From 87ec732d240d501813bfe03353432e040d9db486 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Thu, 13 Aug 2020 11:03:28 +0200 Subject: [PATCH 2/9] Use the new API --- driver.py | 7 ++++--- relational/maintenance.py | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/driver.py b/driver.py index d9805e1..df92aa2 100755 --- a/driver.py +++ b/driver.py @@ -57,7 +57,7 @@ def load_relations(): print ("Loading relation %s with name %s..." % (i, relname)) - rels[relname] = relation.Relation('%s%s' % (examples_path, i)) + rels[relname] = relation.Relation.load('%s%s' % (examples_path, i)) print('done') @@ -163,7 +163,8 @@ def run_py_test(testname): '''Runs a python test, which evaluates expressions directly rather than queries''' print ("Running expression python test: " + colorize(testname, COLOR_MAGENTA)) - + exp_result = None + result = None try: expr = readfile('%s%s.python' % (tests_path, testname)) @@ -238,7 +239,7 @@ def run_test(testname): o_result = None try: - result_rel = relation.Relation('%s%s.result' % (tests_path, testname)) + result_rel = relation.Relation.load('%s%s.result' % (tests_path, testname)) query = readfile('%s%s.query' % (tests_path, testname)).strip() o_query = optimizer.optimize_all(query, rels) diff --git a/relational/maintenance.py b/relational/maintenance.py index 01a2374..5afa527 100644 --- a/relational/maintenance.py +++ b/relational/maintenance.py @@ -91,7 +91,7 @@ class UserInterface: def load(self, filename: str, name: str) -> None: '''Loads a relation from file, and gives it a name to be used in subsequent queries.''' - rel = Relation(filename) + rel = Relation.load(filename) self.set_relation(name, rel) def unload(self, name: str) -> None: @@ -204,7 +204,7 @@ class UserInterface: [varname =] query to assign the result to a new relation ''' - r = Relation() + r = None queries = query.split('\n') for query in queries: if query.strip() == '': @@ -219,4 +219,6 @@ class UserInterface: query, str(e) )) + if r is None: + raise Exception('No query executed') return r From 4722c9b0e880b96ffa9df0c4b2d80dd1c330c027 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Thu, 13 Aug 2020 11:04:22 +0200 Subject: [PATCH 3/9] Relations are now frozen --- relational/relation.py | 83 ++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 56 deletions(-) diff --git a/relational/relation.py b/relational/relation.py index a1396b7..c8a5f1e 100644 --- a/relational/relation.py +++ b/relational/relation.py @@ -20,7 +20,7 @@ # relational operations on them. import csv -from itertools import chain, repeat +from itertools import chain, repeat, product as iproduct from collections import deque from typing import * from pathlib import Path @@ -66,7 +66,7 @@ class Relation(NamedTuple): reader = csv.reader(fp) # Creating a csv reader header = Header(next(reader)) # read 1st line #FIXME load properly - content = frozenset((tuple(i) for i in reader)) + content = frozenset((tuple(Rstring(s) for s in i) for i in reader)) return Relation(header, content) def __iter__(self): @@ -111,14 +111,14 @@ class Relation(NamedTuple): ''' Selection, expr must be a valid Python expression; can contain field names. ''' - newt = Relation() - newt.header = Header(self.header) + header = Header(self.header) try: c_expr = compile(expr, 'selection', 'eval') except: raise Exception('Failed to compile expression: %s' % expr) + content = set() for i in self.content: # Fills the attributes dictionary with the values of the tuple attributes = {attr: i[j].autocast() @@ -127,11 +127,11 @@ class Relation(NamedTuple): try: if eval(c_expr, attributes): - newt.content.add(i) + content.add(i) except Exception as e: raise Exception( "Failed to evaluate %s\n%s" % (expr, e.__str__())) - return newt + return Relation(header, frozenset(content)) def product(self, other: 'Relation') -> 'Relation': ''' @@ -144,13 +144,10 @@ class Relation(NamedTuple): raise Exception( 'Unable to perform product on relations with colliding attributes' ) - newt = Relation() - newt.header = Header(self.header + other.header) + header = Header(self.header + other.header) - for i in self.content: - for j in other.content: - newt.content.add(i + j) - return newt + content = frozenset(i+j for i, j in iproduct(self.content, other.content)) + return Relation(header, content) def projection(self, *attributes) -> 'Relation': ''' @@ -172,16 +169,11 @@ class Relation(NamedTuple): if len(ids) == 0: raise Exception('Invalid attributes for projection') - newt = Relation() - # Create the header - h = (self.header[i] for i in ids) - newt.header = Header(h) + header = Header((self.header[i] for i in ids)) - # Create the body - for i in self.content: - row = (i[j] for j in ids) - newt.content.add(tuple(row)) - return newt + content = frozenset(tuple((i[j] for j in ids)) for i in self.content) + + return Relation(header, content) def rename(self, params: Dict[str, str]) -> 'Relation': ''' @@ -192,12 +184,8 @@ class Relation(NamedTuple): For example if you want to rename a to b, call rel.rename({'a':'b'}) ''' - newt = Relation() - newt.header = self.header.rename(params) - - newt.content = self.content - self._make_duplicate(newt) - return newt + header = self.header.rename(params) + return Relation(header, self.content) def intersection(self, other: 'Relation') -> 'Relation': ''' @@ -206,22 +194,14 @@ class Relation(NamedTuple): Will return an empty one if there are no common items. ''' other = self._rearrange(other) # Rearranges attributes' order - newt = Relation() - newt.header = Header(self.header) - - newt.content = self.content.intersection(other.content) - return newt + return Relation(self.header, self.content.intersection(other.content)) def difference(self, other: 'Relation') -> 'Relation': '''Difference operation. The result will contain items present in first operand but not in second one. ''' other = self._rearrange(other) # Rearranges attributes' order - newt = Relation() - newt.header = Header(self.header) - - newt.content = self.content.difference(other.content) - return newt + return Relation(self.header, self.content.difference(other.content)) def division(self, other: 'Relation') -> 'Relation': '''Division operator @@ -254,11 +234,7 @@ class Relation(NamedTuple): and second operands. ''' other = self._rearrange(other) # Rearranges attributes' order - newt = Relation() - newt.header = Header(self.header) - - newt.content = self.content.union(other.content) - return newt + return Relation(self.header, self.content.union(other.content)) def thetajoin(self, other: 'Relation', expr: str) -> 'Relation': '''Defined as product and then selection with the given expression.''' @@ -288,11 +264,10 @@ class Relation(NamedTuple): shared = self.header.intersection(other.header) - newt = Relation() # Creates the new relation # Creating the header with all the fields, done like that because order is # needed h = (i for i in other.header if i not in shared) - newt.header = Header(chain(self.header, h)) + header = Header(chain(self.header, h)) # Shared ids of self sid = self.header.getAttributesId(shared) @@ -302,6 +277,7 @@ class Relation(NamedTuple): # Non shared ids of the other relation noid = [i for i in range(len(other.header)) if i not in oid] + content = set() for i in self.content: # Tuple partecipated to the join? added = False @@ -313,14 +289,14 @@ class Relation(NamedTuple): if match: item = chain(i, (j[l] for l in noid)) - newt.content.add(tuple(item)) + content.add(tuple(item)) added = True # If it didn't partecipate, adds it if not added: item = chain(i, repeat(Rstring('---'), len(noid))) - newt.content.add(tuple(item)) + content.add(tuple(item)) - return newt + return Relation(header, frozenset(content)) def join(self, other: 'Relation') -> 'Relation': ''' @@ -331,12 +307,10 @@ class Relation(NamedTuple): # List of attributes in common between the relations shared = self.header.intersection(other.header) - newt = Relation() # Creates the new relation - # Creating the header with all the fields, done like that because order is # needed h = (i for i in other.header if i not in shared) - newt.header = Header(chain(self.header, h)) + header = Header(chain(self.header, h)) # Shared ids of self sid = self.header.getAttributesId(shared) @@ -346,6 +320,7 @@ class Relation(NamedTuple): # Non shared ids of the other relation noid = [i for i in range(len(other.header)) if i not in oid] + content = set() for i in self.content: for j in other.content: match = True @@ -354,9 +329,9 @@ class Relation(NamedTuple): if match: item = chain(i, (j[l] for l in noid)) - newt.content.add(tuple(item)) + content.add(tuple(item)) - return newt + return Relation(header, frozenset(content)) def __eq__(self, other): if not isinstance(other, Relation): @@ -410,7 +385,6 @@ class Relation(NamedTuple): Returns the number of affected rows. ''' - self._make_writable(copy_content=False) affected = self.selection(expr) not_affected = self.difference(affected) @@ -446,8 +420,6 @@ class Relation(NamedTuple): ) ) - self._make_writable() - prevlen = len(self.content) self.content.add(tuple(map(Rstring, values))) return len(self.content) - prevlen @@ -462,7 +434,6 @@ class Relation(NamedTuple): Returns the number of affected rows.''' l = len(self.content) - self._make_writable(copy_content=False) self.content = self.difference(self.selection(expr)).content return len(self.content) - l From 08e5131479dd0f015b4326fd3d68444cb5440a86 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Thu, 13 Aug 2020 14:37:55 +0200 Subject: [PATCH 4/9] Removed update/insert/delete I don't use them and AFAIK this module has no other users --- relational/relation.py | 66 ------------------------------------- tests_dir/rename_insert1.py | 4 --- tests_dir/rename_insert2.py | 4 --- tests_dir/update.py | 16 --------- 4 files changed, 90 deletions(-) delete mode 100644 tests_dir/rename_insert1.py delete mode 100644 tests_dir/rename_insert2.py delete mode 100644 tests_dir/update.py diff --git a/relational/relation.py b/relational/relation.py index c8a5f1e..9527aef 100644 --- a/relational/relation.py +++ b/relational/relation.py @@ -371,72 +371,6 @@ class Relation(NamedTuple): return res - def update(self, expr: str, dic: dict) -> int: - ''' - Updates certain values of a relation. - - expr must be a valid Python expression that can contain field names. - - This operation will change the relation itself instead of generating a new one, - updating all the tuples where expr evaluates as True. - - Dic must be a dictionary that has the form "field name":"new value". Every kind of value - will be converted into a string. - - Returns the number of affected rows. - ''' - affected = self.selection(expr) - not_affected = self.difference(affected) - - new_values = tuple( - zip(self.header.getAttributesId(dic.keys()), dic.values()) - ) - - for i in set(affected.content): - li = list(i) - - for column, value in new_values: - li[column] = value - not_affected.insert(li) - - self.content = not_affected.content - return len(affected) - - def insert(self, values: Union[list,tuple]) -> int: - ''' - Inserts a tuple in the relation. - This function will not insert duplicate tuples. - All the values will be converted in string. - Will return the number of inserted rows. - - Will fail if the tuple has the wrong amount of items. - ''' - - if len(self.header) != len(values): - raise Exception( - 'Tuple has the wrong size. Expected %d, got %d' % ( - len(self.header), - len(values) - ) - ) - - prevlen = len(self.content) - self.content.add(tuple(map(Rstring, values))) - return len(self.content) - prevlen - - def delete(self, expr: str) -> int: - ''' - Delete, expr must be a valid Python expression; can contain field names. - - This operation will change the relation itself instead of generating a new one, - deleting all the tuples where expr evaluates as True. - - Returns the number of affected rows.''' - - l = len(self.content) - self.content = self.difference(self.selection(expr)).content - return len(self.content) - l - class Header(tuple): diff --git a/tests_dir/rename_insert1.py b/tests_dir/rename_insert1.py deleted file mode 100644 index b2ccad6..0000000 --- a/tests_dir/rename_insert1.py +++ /dev/null @@ -1,4 +0,0 @@ -p1=people.rename({"id":"ido"}) -people.insert((123,"lala",0,31)) -assert people!=p1 -people.delete("id==123") diff --git a/tests_dir/rename_insert2.py b/tests_dir/rename_insert2.py deleted file mode 100644 index 8e271af..0000000 --- a/tests_dir/rename_insert2.py +++ /dev/null @@ -1,4 +0,0 @@ -p1=people.rename({"id":"ido"}) -p1.insert((123,"lala",0,31)) -assert people!=p1 -people.delete("id==123") diff --git a/tests_dir/update.py b/tests_dir/update.py deleted file mode 100644 index b0d0b6a..0000000 --- a/tests_dir/update.py +++ /dev/null @@ -1,16 +0,0 @@ -p1=people -p2=p1.rename({'id':'i'}) -p2=p2.rename({'i':'id'}) -assert p1==p2 -assert p1._readonly -assert p2._readonly -# It is VERY important to not change the original relations -# or other tests might fail randomly, since the relations are -# only loaded once - -p2.update('age==20', {'age':50}) -assert p2._readonly == False -assert p1!=p2 -p3 = p2.selection('age!=50') -p4 = p1.selection('age!=20') -assert p3==p4 From 683ff6f26d7025206b9992b1b3b96abfa448a644 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Thu, 13 Aug 2020 14:38:42 +0200 Subject: [PATCH 5/9] Better typing --- relational/relation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relational/relation.py b/relational/relation.py index 9527aef..71cdb37 100644 --- a/relational/relation.py +++ b/relational/relation.py @@ -58,7 +58,7 @@ class Relation(NamedTuple): method. ''' header: 'Header' - content: FrozenSet[tuple] + content: FrozenSet[Tuple[Rstring, ...]] @staticmethod def load(filename: Union[str, Path]) -> 'Relation': From 519cc35b183e167c1e31cd30e8c1740765f2400f Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Sat, 15 Aug 2020 09:17:23 +0200 Subject: [PATCH 6/9] Use lists for partial content So conversion to set is done only once. --- relational/relation.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/relational/relation.py b/relational/relation.py index 71cdb37..692db90 100644 --- a/relational/relation.py +++ b/relational/relation.py @@ -118,7 +118,7 @@ class Relation(NamedTuple): except: raise Exception('Failed to compile expression: %s' % expr) - content = set() + content = [] for i in self.content: # Fills the attributes dictionary with the values of the tuple attributes = {attr: i[j].autocast() @@ -127,7 +127,7 @@ class Relation(NamedTuple): try: if eval(c_expr, attributes): - content.add(i) + content.append(i) except Exception as e: raise Exception( "Failed to evaluate %s\n%s" % (expr, e.__str__())) @@ -277,7 +277,7 @@ class Relation(NamedTuple): # Non shared ids of the other relation noid = [i for i in range(len(other.header)) if i not in oid] - content = set() + content = [] for i in self.content: # Tuple partecipated to the join? added = False @@ -289,12 +289,12 @@ class Relation(NamedTuple): if match: item = chain(i, (j[l] for l in noid)) - content.add(tuple(item)) + content.append(tuple(item)) added = True # If it didn't partecipate, adds it if not added: item = chain(i, repeat(Rstring('---'), len(noid))) - content.add(tuple(item)) + content.append(tuple(item)) return Relation(header, frozenset(content)) @@ -320,7 +320,7 @@ class Relation(NamedTuple): # Non shared ids of the other relation noid = [i for i in range(len(other.header)) if i not in oid] - content = set() + content = [] for i in self.content: for j in other.content: match = True @@ -329,7 +329,7 @@ class Relation(NamedTuple): if match: item = chain(i, (j[l] for l in noid)) - content.add(tuple(item)) + content.append(tuple(item)) return Relation(header, frozenset(content)) From 94404f60f9c3e88f1844d236cf53d4ec19f8af72 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Sat, 15 Aug 2020 20:53:46 +0200 Subject: [PATCH 7/9] Test the load function --- tests_dir/size.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests_dir/size.py diff --git a/tests_dir/size.py b/tests_dir/size.py new file mode 100644 index 0000000..a6f3f07 --- /dev/null +++ b/tests_dir/size.py @@ -0,0 +1,2 @@ +assert len(people) == 8 +assert len(people.header) == 4 From 1bbef1f1b8aaaa449971d06990258b3070abc000 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Sat, 15 Aug 2020 20:54:58 +0200 Subject: [PATCH 8/9] function to load a relation from iterators --- relational/relation.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/relational/relation.py b/relational/relation.py index 692db90..d9f1703 100644 --- a/relational/relation.py +++ b/relational/relation.py @@ -62,12 +62,30 @@ class Relation(NamedTuple): @staticmethod def load(filename: Union[str, Path]) -> 'Relation': + ''' + Load a relation object from a csv file. + + The 1st row is the header and the other rows are the content. + ''' with open(filename) as fp: reader = csv.reader(fp) # Creating a csv reader header = Header(next(reader)) # read 1st line - #FIXME load properly - content = frozenset((tuple(Rstring(s) for s in i) for i in reader)) - return Relation(header, content) + return Relation.create_from(header, reader) + + @staticmethod + def create_from(header: Iterable[str], content: Iterable[Iterable[str]]) -> 'Relation': + ''' + Iterator for the header, and iterator for the content. + ''' + header = Header(header) + r_content: List[Tuple[Rstring, ...]] = [] + for row in content: + content_row: Tuple[Rstring, ...] = tuple(Rstring(i) for i in row) + if len(content_row) != len(header): + raise ValueError(f'Line {row} contains an incorrect amount of values') + r_content.append(content_row) + return Relation(header, frozenset(r_content)) + def __iter__(self): return iter(self.content) From 200c748ee5134797268a15137cd79d265d4b0c02 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Sat, 15 Aug 2020 21:06:57 +0200 Subject: [PATCH 9/9] Use new API to edit relations --- relational_gui/creator.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/relational_gui/creator.py b/relational_gui/creator.py index 1da2e78..412cd22 100644 --- a/relational_gui/creator.py +++ b/relational_gui/creator.py @@ -84,14 +84,13 @@ class creatorForm(QtWidgets.QDialog): for i in range(self.table.columnCount())) try: - header = relation.header(h) + header = relation.Header(h) except Exception as e: QtWidgets.QMessageBox.information(None, QtWidgets.QApplication.translate("Form", "Error"), "%s\n%s" % ( QtWidgets.QApplication.translate("Form", "Header error!"), e.__str__())) return None - r = relation.relation() - r.header = header + content = [] for i in range(1, self.table.rowCount()): hlist = [] for j in range(self.table.columnCount()): @@ -101,11 +100,10 @@ class creatorForm(QtWidgets.QDialog): QtWidgets.QMessageBox.information(None, QtWidgets.QApplication.translate( "Form", "Error"), QtWidgets.QApplication.translate("Form", "Unset value in %d,%d!" % (i + 1, j + 1))) return None - r.insert(hlist) - return r + content.append(hlist) + return relation.Relation.create_from(header, content) def accept(self): - self.result_relation = self.create_relation() # Doesn't close the window in case of errors