2008-07-24 10:08:55 +07:00
|
|
|
# Relational
|
2008-07-14 07:18:35 +07:00
|
|
|
# Copyright (C) 2008 Salvo "LtWorf" Tomaselli
|
|
|
|
#
|
|
|
|
# Relation is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
|
|
|
|
2008-08-08 09:28:47 +07:00
|
|
|
from rtypes import *
|
2008-08-08 07:41:16 +07:00
|
|
|
|
2008-07-14 07:18:35 +07:00
|
|
|
class relation (object):
|
|
|
|
'''This objects defines a relation (as a group of consistent tuples) and operations
|
|
|
|
A relation can be represented using a table
|
|
|
|
Calling an operation and providing a non relation parameter when it is expected will
|
|
|
|
result in a None value'''
|
|
|
|
def __init__(self,filename=""):
|
|
|
|
'''Creates a relation, accepts a filename and then it will load the relation from
|
|
|
|
that file. If no parameter is supplied an empty relation is created. Empty
|
|
|
|
relations are used in internal operations'''
|
|
|
|
if len(filename)==0:#Empty relation
|
|
|
|
self.content=[]
|
|
|
|
self.header=header([])
|
|
|
|
return
|
|
|
|
fp=file(filename)
|
|
|
|
self.header=header(fp.readline().replace("\n","").strip().split(" "))
|
|
|
|
|
|
|
|
self.content=[]
|
|
|
|
row=fp.readline()
|
|
|
|
while len(row)!=0:#Reads the content of the relation
|
|
|
|
self.content.append(row.replace("\n","").strip().split(" "))
|
|
|
|
row=fp.readline()
|
|
|
|
fp.close()
|
|
|
|
|
|
|
|
|
|
|
|
def save(self,filename):
|
|
|
|
'''Saves the relation in a file'''
|
|
|
|
res=""
|
2008-07-17 13:28:44 +07:00
|
|
|
for f in self.header.attributes:
|
2008-07-14 07:18:35 +07:00
|
|
|
res+="%s "%(f)
|
|
|
|
|
|
|
|
|
|
|
|
for r in self.content:
|
|
|
|
res+="\n"
|
|
|
|
for i in r:
|
|
|
|
res+="%s "% (i)
|
|
|
|
fp=file(filename,'w')
|
|
|
|
fp.write(res)
|
|
|
|
fp.close()
|
2008-07-17 17:39:58 +07:00
|
|
|
def rearrange(self,other):
|
|
|
|
'''If two relations share the same attributes in a different order, this method
|
|
|
|
will use projection to make them have the same attributes' order.
|
|
|
|
It is not exactely related to relational algebra. Just a method used
|
|
|
|
internally.
|
|
|
|
Will return None if they don't share the same attributes'''
|
|
|
|
if (self.__class__!=other.__class__):
|
|
|
|
return None
|
|
|
|
if self.header.sharedAttributes(other.header) == len(self.header.attributes) == len(other.header.attributes):
|
|
|
|
return other.projection(list(self.header.attributes))
|
|
|
|
return None
|
|
|
|
|
2008-07-14 09:38:42 +07:00
|
|
|
def selection(self,expr):
|
|
|
|
'''Selection, expr must be a valid boolean expression, can contain field names,
|
|
|
|
constant, math operations and boolean ones.'''
|
2008-07-17 13:28:44 +07:00
|
|
|
attributes={}
|
2008-07-14 09:38:42 +07:00
|
|
|
newt=relation()
|
2008-07-17 13:28:44 +07:00
|
|
|
newt.header=header(list(self.header.attributes))
|
2008-07-14 09:38:42 +07:00
|
|
|
for i in self.content:
|
2008-07-17 13:28:44 +07:00
|
|
|
for j in range(len(self.header.attributes)):
|
2008-07-14 09:38:42 +07:00
|
|
|
if i[j].isdigit():
|
2008-07-17 13:28:44 +07:00
|
|
|
attributes[self.header.attributes[j]]=int(i[j])
|
2008-08-08 09:28:47 +07:00
|
|
|
elif rstring(i[j]).isFloat():
|
2008-08-08 07:41:16 +07:00
|
|
|
attributes[self.header.attributes[j]]=float(i[j])
|
2008-08-08 09:28:47 +07:00
|
|
|
elif isDate(i[j]):
|
|
|
|
attributes[self.header.attributes[j]]=rdate(i[j])
|
2008-07-14 09:38:42 +07:00
|
|
|
else:
|
2008-07-17 13:28:44 +07:00
|
|
|
attributes[self.header.attributes[j]]=i[j]
|
2008-07-14 09:38:42 +07:00
|
|
|
|
|
|
|
|
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
if eval(expr,attributes):
|
2008-07-14 09:38:42 +07:00
|
|
|
newt.content.append(i)
|
|
|
|
return newt
|
2008-07-14 07:18:35 +07:00
|
|
|
def product (self,other):
|
2008-07-17 13:28:44 +07:00
|
|
|
'''Cartesian product, attributes must be different to avoid collisions
|
|
|
|
Doing this operation on relations with colliding attributes will
|
2008-07-14 07:18:35 +07:00
|
|
|
cause the return of a None value.
|
2008-07-17 13:28:44 +07:00
|
|
|
It is possible to use rename on attributes and then use the product'''
|
2008-07-14 07:18:35 +07:00
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
if (self.__class__!=other.__class__)or(self.header.sharedAttributes(other.header)!=0):
|
2008-07-14 07:18:35 +07:00
|
|
|
return None
|
|
|
|
newt=relation()
|
2008-07-17 13:28:44 +07:00
|
|
|
newt.header=header(self.header.attributes+other.header.attributes)
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
for i in self.content:
|
|
|
|
for j in other.content:
|
2008-07-16 03:38:50 +07:00
|
|
|
newt.content.append(i+j)
|
2008-07-14 07:18:35 +07:00
|
|
|
return newt
|
|
|
|
|
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
def projection(self,* attributes):
|
2008-07-14 07:18:35 +07:00
|
|
|
'''Projection operator, takes many parameters, for each field to use.
|
|
|
|
Can also use a single parameter with a list.
|
|
|
|
Will delete duplicate items
|
|
|
|
If an empty list or no parameters are provided, returns None'''
|
|
|
|
#Parameters are supplied in a list, instead with multiple parameters
|
2008-07-17 13:28:44 +07:00
|
|
|
if attributes[0].__class__ == list().__class__:
|
|
|
|
attributes=attributes[0]
|
2008-07-14 07:18:35 +07:00
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
#Avoiding duplicated attributes
|
|
|
|
attributes1=[]
|
|
|
|
for i in attributes:
|
|
|
|
if i not in attributes1:
|
|
|
|
attributes1.append(i)
|
|
|
|
attributes=attributes1
|
2008-07-14 08:04:32 +07:00
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
ids=self.header.getAttributesId(attributes)
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
if len(ids)==0:
|
|
|
|
return None
|
|
|
|
newt=relation()
|
|
|
|
#Create the header
|
|
|
|
h=[]
|
|
|
|
for i in ids:
|
2008-07-17 13:28:44 +07:00
|
|
|
h.append(self.header.attributes[i])
|
2008-07-14 07:18:35 +07:00
|
|
|
newt.header=header(h)
|
|
|
|
|
|
|
|
#Create the body
|
|
|
|
for i in self.content:
|
|
|
|
row=[]
|
|
|
|
for j in ids:
|
|
|
|
row.append(i[j])
|
|
|
|
if row not in newt.content:#Avoids duplicated items
|
|
|
|
newt.content.append(row)
|
|
|
|
return newt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rename(self,*params):
|
|
|
|
'''Operation rename. Takes an even number of parameters: (old,new,old,new....)
|
|
|
|
Will replace the 1st parameter with the 2nd, the 3rd with 4th, and so on...
|
|
|
|
If an "old" field doesn't exist, None will be returned'''
|
|
|
|
result=[]
|
|
|
|
|
|
|
|
newt=relation()
|
2008-07-17 13:28:44 +07:00
|
|
|
newt.header=header(list(self.header.attributes))
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
for i in range(len(params)):
|
|
|
|
if i%2==0:
|
|
|
|
if (newt.header.rename(params[i],params[i+1])) == False:
|
|
|
|
return None
|
|
|
|
|
|
|
|
newt.content=list(self.content)
|
|
|
|
return newt
|
|
|
|
|
|
|
|
def intersection(self,other):
|
|
|
|
'''Intersection operation. The result will contain items present in both
|
|
|
|
operands.
|
|
|
|
Will return an empty one if there are no common items.
|
|
|
|
Will return None if headers are different.
|
|
|
|
It is possible to use projection and rename to make headers match.'''
|
2008-07-17 17:39:58 +07:00
|
|
|
other=self.rearrange(other) #Rearranges attributes' order
|
2008-07-14 07:18:35 +07:00
|
|
|
if (self.__class__!=other.__class__)or(self.header!=other.header):
|
|
|
|
return None
|
|
|
|
newt=relation()
|
2008-07-17 13:28:44 +07:00
|
|
|
newt.header=header(list(self.header.attributes))
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
#Adds only element not in other, duplicating them
|
|
|
|
for e in self.content:
|
|
|
|
if e in other.content:
|
|
|
|
newt.content.append(list(e))
|
|
|
|
return newt
|
|
|
|
|
|
|
|
def difference(self,other):
|
|
|
|
'''Difference operation. The result will contain items present in first
|
|
|
|
operand but not in second one.
|
|
|
|
Will return an empty one if the second is a superset of first.
|
|
|
|
Will return None if headers are different.
|
|
|
|
It is possible to use projection and rename to make headers match.'''
|
2008-07-17 17:39:58 +07:00
|
|
|
other=self.rearrange(other) #Rearranges attributes' order
|
2008-07-14 07:18:35 +07:00
|
|
|
if (self.__class__!=other.__class__)or(self.header!=other.header):
|
|
|
|
return None
|
|
|
|
newt=relation()
|
2008-07-17 13:28:44 +07:00
|
|
|
newt.header=header(list(self.header.attributes))
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
#Adds only element not in other, duplicating them
|
|
|
|
for e in self.content:
|
|
|
|
if e not in other.content:
|
|
|
|
newt.content.append(list(e))
|
|
|
|
return newt
|
|
|
|
|
|
|
|
def union(self,other):
|
|
|
|
'''Union operation. The result will contain items present in first
|
|
|
|
and second operands.
|
|
|
|
Will return an empty one if both are empty.
|
|
|
|
Will not insert tuplicated items.
|
|
|
|
Will return None if headers are different.
|
|
|
|
It is possible to use projection and rename to make headers match.'''
|
2008-07-17 17:39:58 +07:00
|
|
|
other=self.rearrange(other) #Rearranges attributes' order
|
2008-07-14 07:18:35 +07:00
|
|
|
if (self.__class__!=other.__class__)or(self.header!=other.header):
|
|
|
|
return None
|
|
|
|
newt=relation()
|
2008-07-17 13:28:44 +07:00
|
|
|
newt.header=header(list(self.header.attributes))
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
#Adds element from self, duplicating them all
|
|
|
|
for e in self.content:
|
|
|
|
newt.content.append(list(e))
|
|
|
|
|
|
|
|
for e in other.content:
|
|
|
|
if e not in newt.content:
|
|
|
|
newt.content.append(list(e))
|
|
|
|
return newt
|
2008-07-17 17:53:32 +07:00
|
|
|
def thetajoin(self,other,expr):
|
2008-07-17 17:44:26 +07:00
|
|
|
'''Defined as product and then selection with the given expression.'''
|
|
|
|
return self.product(other).selection(expr)
|
|
|
|
|
2008-07-17 17:39:58 +07:00
|
|
|
def outer(self,other):
|
|
|
|
'''Does a left and a right outer join and returns their union.'''
|
|
|
|
a=self.outer_right(other)
|
|
|
|
b=self.outer_left(other)
|
|
|
|
print a
|
|
|
|
print b
|
|
|
|
|
|
|
|
return a.union(b)
|
|
|
|
|
|
|
|
def outer_right(self,other):
|
|
|
|
'''Same as left join, with swapped parameters'''
|
|
|
|
return self.outer_left(other,True)
|
|
|
|
|
|
|
|
def outer_left(self,other,swap=False):
|
|
|
|
'''Outer left join. Considers self as left and param as right. If the
|
|
|
|
tuple has no corrispondence, empty attributes are filled with a "---"
|
|
|
|
string. This is due to the fact that empty string or a space would cause
|
|
|
|
problems when saving the relation.
|
|
|
|
Just like natural join, it works considering shared attributes.
|
|
|
|
If swap is True, it will behave as a right join'''
|
|
|
|
|
|
|
|
if swap:
|
|
|
|
tmp=other
|
|
|
|
other=self
|
|
|
|
self=tmp
|
|
|
|
|
|
|
|
shared=[]
|
|
|
|
for i in self.header.attributes:
|
|
|
|
if i in other.header.attributes:
|
|
|
|
shared.append(i)
|
|
|
|
|
|
|
|
newt=relation() #Creates the new relation
|
2008-07-17 13:28:44 +07:00
|
|
|
|
2008-07-17 17:39:58 +07:00
|
|
|
#Adds all the attributes of the 1st relation
|
|
|
|
newt.header=header(list(self.header.attributes))
|
|
|
|
|
|
|
|
#Adds all the attributes of the 2nd, when non shared
|
|
|
|
for i in other.header.attributes:
|
|
|
|
if i not in shared:
|
|
|
|
newt.header.attributes.append(i)
|
|
|
|
#Shared ids of self
|
|
|
|
sid=self.header.getAttributesId(shared)
|
|
|
|
#Shared ids of the other relation
|
|
|
|
oid=other.header.getAttributesId(shared)
|
|
|
|
|
|
|
|
#Non shared ids of the other relation
|
|
|
|
noid=[]
|
|
|
|
for i in range(len(other.header.attributes)):
|
|
|
|
if i not in oid:
|
|
|
|
noid.append(i)
|
|
|
|
|
|
|
|
for i in self.content:
|
|
|
|
#Tuple partecipated to the join?
|
|
|
|
added=False
|
|
|
|
for j in other.content:
|
|
|
|
match=True
|
|
|
|
for k in range(len(sid)):
|
|
|
|
match=match and ( i[sid[k]]== j[oid[k]])
|
|
|
|
|
|
|
|
if match:
|
|
|
|
item=list(i)
|
|
|
|
for l in noid:
|
|
|
|
item.append(j[l])
|
|
|
|
|
|
|
|
newt.content.append(item)
|
|
|
|
added=True
|
|
|
|
#If it didn't partecipate, adds it
|
|
|
|
if not added:
|
|
|
|
item=list(i)
|
|
|
|
for l in range(len(noid)):
|
|
|
|
item.append("---")
|
|
|
|
newt.content.append(item)
|
|
|
|
|
|
|
|
return newt
|
|
|
|
|
2008-07-16 08:42:02 +07:00
|
|
|
def join(self,other):
|
2008-07-17 13:28:44 +07:00
|
|
|
'''Natural join, joins on shared attributes (one or more). If there are no
|
|
|
|
shared attributes, it will behave as cartesian product.'''
|
2008-07-16 08:42:02 +07:00
|
|
|
shared=[]
|
2008-07-17 13:28:44 +07:00
|
|
|
for i in self.header.attributes:
|
|
|
|
if i in other.header.attributes:
|
2008-07-16 08:42:02 +07:00
|
|
|
shared.append(i)
|
2008-07-17 13:28:44 +07:00
|
|
|
|
2008-07-16 08:42:02 +07:00
|
|
|
newt=relation() #Creates the new relation
|
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
#Adds all the attributes of the 1st relation
|
|
|
|
newt.header=header(list(self.header.attributes))
|
2008-07-16 08:42:02 +07:00
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
#Adds all the attributes of the 2nd, when non shared
|
|
|
|
for i in other.header.attributes:
|
2008-07-16 08:42:02 +07:00
|
|
|
if i not in shared:
|
2008-07-17 13:28:44 +07:00
|
|
|
newt.header.attributes.append(i)
|
2008-07-16 08:42:02 +07:00
|
|
|
#Shared ids of self
|
2008-07-17 13:28:44 +07:00
|
|
|
sid=self.header.getAttributesId(shared)
|
2008-07-16 08:42:02 +07:00
|
|
|
#Shared ids of the other relation
|
2008-07-17 13:28:44 +07:00
|
|
|
oid=other.header.getAttributesId(shared)
|
2008-07-16 08:42:02 +07:00
|
|
|
|
|
|
|
#Non shared ids of the other relation
|
|
|
|
noid=[]
|
2008-07-17 13:28:44 +07:00
|
|
|
for i in range(len(other.header.attributes)):
|
2008-07-16 08:42:02 +07:00
|
|
|
if i not in oid:
|
|
|
|
noid.append(i)
|
|
|
|
|
|
|
|
for i in self.content:
|
|
|
|
for j in other.content:
|
|
|
|
match=True
|
|
|
|
for k in range(len(sid)):
|
|
|
|
match=match and ( i[sid[k]]== j[oid[k]])
|
|
|
|
|
|
|
|
if match:
|
|
|
|
item=list(i)
|
|
|
|
for l in noid:
|
|
|
|
item.append(j[l])
|
|
|
|
|
|
|
|
newt.content.append(item)
|
|
|
|
|
|
|
|
return newt
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
'''Returns a string representation of the relation, can be printed with
|
|
|
|
monospaced fonts'''
|
|
|
|
m_len=[] #Maximum lenght string
|
2008-07-17 13:28:44 +07:00
|
|
|
for f in self.header.attributes:
|
2008-07-14 07:18:35 +07:00
|
|
|
m_len.append(len(f))
|
|
|
|
|
|
|
|
for f in self.content:
|
|
|
|
col=0
|
|
|
|
for i in f:
|
|
|
|
if len(i)>m_len[col]:
|
|
|
|
m_len[col]=len(i)
|
|
|
|
col+=1
|
|
|
|
|
|
|
|
|
|
|
|
res=""
|
2008-07-17 13:28:44 +07:00
|
|
|
for f in range(len(self.header.attributes)):
|
|
|
|
res+="%s"%(self.header.attributes[f].ljust(2+m_len[f]))
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
|
|
|
|
for r in self.content:
|
|
|
|
col=0
|
|
|
|
res+="\n"
|
|
|
|
for i in r:
|
|
|
|
res+="%s"% (i.ljust(2+m_len[col]))
|
|
|
|
col+=1
|
|
|
|
|
|
|
|
return res
|
2008-07-16 08:42:02 +07:00
|
|
|
|
2008-07-14 07:18:35 +07:00
|
|
|
class header (object):
|
|
|
|
'''This class defines the header of a relation.
|
|
|
|
It is used within relations to know if requested operations are accepted'''
|
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
def __init__(self,attributes):
|
|
|
|
'''Accepts a list with attributes' names. Names MUST be unique'''
|
|
|
|
self.attributes=attributes
|
2008-07-14 07:18:35 +07:00
|
|
|
def __repr__(self):
|
2008-07-17 13:28:44 +07:00
|
|
|
return "header(%s)" % (self.attributes.__repr__())
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
|
|
|
|
def rename(self,old,new):
|
|
|
|
'''Renames a field. Doesn't check if it is a duplicate.
|
|
|
|
Returns True if the field was renamed, False otherwise'''
|
2008-07-17 13:28:44 +07:00
|
|
|
for i in range(len(self.attributes)):
|
|
|
|
if self.attributes[i]==old:
|
|
|
|
self.attributes[i]=new
|
2008-07-14 07:18:35 +07:00
|
|
|
return True
|
|
|
|
return False #Requested field was not found
|
|
|
|
|
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
def sharedAttributes(self,other):
|
|
|
|
'''Returns how many attributes this header has in common with a given one'''
|
2008-07-14 07:18:35 +07:00
|
|
|
res=0
|
2008-07-17 13:28:44 +07:00
|
|
|
for i in self.attributes:
|
|
|
|
if i in other.attributes:
|
2008-07-14 07:18:35 +07:00
|
|
|
res+=1
|
|
|
|
return res
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
'''Returns String representation of the field's list'''
|
2008-07-17 13:28:44 +07:00
|
|
|
return self.attributes.__str__()
|
2008-07-14 07:18:35 +07:00
|
|
|
|
|
|
|
def __eq__(self,other):
|
2008-07-17 13:28:44 +07:00
|
|
|
return self.attributes==other.attributes
|
2008-07-14 07:18:35 +07:00
|
|
|
def __ne__(self,other):
|
2008-07-17 13:28:44 +07:00
|
|
|
return self.attributes!=other.attributes
|
2008-07-14 07:18:35 +07:00
|
|
|
|
2008-07-17 13:28:44 +07:00
|
|
|
def getAttributesId(self,param):
|
2008-07-14 07:18:35 +07:00
|
|
|
'''Returns a list with numeric index corresponding to field's name'''
|
|
|
|
res=[]
|
|
|
|
for i in param:
|
2008-07-17 13:28:44 +07:00
|
|
|
for j in range(len(self.attributes)):
|
|
|
|
if i==self.attributes[j]:
|
2008-07-14 07:18:35 +07:00
|
|
|
res.append(j)
|
|
|
|
return res
|
|
|
|
|
2008-08-08 07:41:16 +07:00
|
|
|
|
2008-07-14 07:18:35 +07:00
|
|
|
if __name__=="__main__":
|
|
|
|
a=["id","nome","cognome"]
|
|
|
|
|
|
|
|
b=header(a)
|
|
|
|
print "b=", b.__repr__()
|
|
|
|
b.rename("nome","nick")
|
|
|
|
|
|
|
|
a=["id","nome","cognome"]
|
|
|
|
c=header(a)
|
|
|
|
print b, c
|
|
|
|
print b==c
|
|
|
|
|