- migrating to unicode

git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@329 014f5005-505e-4b48-8d0a-63407b615a7c
master
LtWorf 2011-10-15 21:45:33 +07:00
parent 24021ddeb0
commit 17452f1e0b
7 changed files with 112 additions and 75 deletions

@ -7,6 +7,7 @@
- Can use both PySide and PyQt
- Removed buttons for adding and deleting tuples
- Can edit relations within the GUI
- Migrating to unicode (instead of strings)
1.0
- Adds history in the GUI

@ -36,6 +36,21 @@ from tokenize import generate_tokens
sel_op=('//=','**=','and','not','in','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']')
PRODUCT=parser.PRODUCT
DIFFERENCE=parser.DIFFERENCE
UNION=parser.UNION
INTERSECTION=parser.INTERSECTION
DIVISION=parser.DIVISION
JOIN=parser.JOIN
JOIN_LEFT=parser.JOIN_LEFT
JOIN_RIGHT=parser.JOIN_RIGHT
JOIN_FULL=parser.JOIN_FULL
PROJECTION=parser.PROJECTION
SELECTION=parser.SELECTION
RENAME=parser.RENAME
ARROW=parser.ARROW
def replace_node(replace,replacement):
'''This function replaces "replace" node with the node "with",
the father of the node will now point to the with node'''
@ -86,7 +101,7 @@ def duplicated_select(n):
in and
'''
changes=0
if n.name=='σ' and n.child.name=='σ':
if n.name==SELECTION and n.child.name==SELECTION:
if n.prop != n.child.prop: #Nested but different, joining them
n.prop = n.prop + " and " + n.child.prop
@ -114,30 +129,30 @@ def futile_union_intersection_subtraction(n):
changes=0
#Union and intersection of the same thing
if n.name in ('','') and n.left==n.right:
if n.name in (UNION,INTERSECTION) and n.left==n.right:
changes=1
replace_node(n,n.left)
#selection and union of the same thing
elif (n.name == ''):
if n.left.name=='σ' and n.left.child==n.right:
elif (n.name == UNION):
if n.left.name==SELECTION and n.left.child==n.right:
changes=1
replace_node(n,n.right)
elif n.right.name=='σ' and n.right.child==n.left:
elif n.right.name==SELECTION and n.right.child==n.left:
changes=1
replace_node(n,n.left)
#selection and intersection of the same thing
elif n.name == '':
if n.left.name=='σ' and n.left.child==n.right:
elif n.name == INTERSECTION:
if n.left.name==SELECTION and n.left.child==n.right:
changes=1
replace_node(n,n.left)
elif n.right.name=='σ' and n.right.child==n.left:
elif n.right.name==SELECTION and n.right.child==n.left:
changes=1
replace_node(n,n.right)
#Subtraction and selection of the same thing
elif (n.name == '-' and (n.right.name=='σ' and n.right.child==n.left)): #Subtraction of two equal things, but one has a selection
elif (n.name == DIFFERENCE and (n.right.name==SELECTION and n.right.child==n.left)): #Subtraction of two equal things, but one has a selection
n.name=n.right.name
n.kind=n.right.kind
n.child=n.right.child
@ -145,10 +160,10 @@ def futile_union_intersection_subtraction(n):
n.left=n.right=None
#Subtraction of the same thing or with selection on the left child
elif (n.name=='-' and ((n.left==n.right) or (n.left.name=='σ' and n.left.child==n.right)) ):#Empty relation
elif (n.name==DIFFERENCE and ((n.left==n.right) or (n.left.name==SELECTION and n.left.child==n.right)) ):#Empty relation
changes=1
n.kind=parser.UNARY
n.name='σ'
n.name=SELECTION
n.prop='False'
n.child=n.left.get_left_leaf()
#n.left=n.right=None
@ -161,8 +176,8 @@ def down_to_unions_subtractions_intersections(n):
σ i==2 (c) σ i==2(d).
'''
changes=0
_o=('','-','')
if n.name=='σ' and n.child.name in _o:
_o=(UNION,DIFFERENCE,INTERSECTION)
if n.name==SELECTION and n.child.name in _o:
left=parser.node()
left.prop=n.prop
@ -190,7 +205,7 @@ def duplicated_projection(n):
them with π i (R)'''
changes=0
if n.name=='π' and n.child.name=='π':
if n.name==PROJECTION and n.child.name==PROJECTION:
n.child=n.child.child
changes+=1
@ -201,13 +216,13 @@ def selection_inside_projection(n):
converts them into π k(σ j (R))'''
changes=0
if n.name=='σ' and n.child.name=='π':
if n.name==SELECTION and n.child.name==PROJECTION:
changes=1
temp=n.prop
n.prop=n.child.prop
n.child.prop=temp
n.name='π'
n.child.name='σ'
n.name=PROJECTION
n.child.name=SELECTION
return changes+recoursive_scan(selection_inside_projection,n)
@ -219,15 +234,15 @@ def swap_union_renames(n):
Does the same with subtraction and intersection'''
changes=0
if n.name in ('-','','') and n.left.name==n.right.name and n.left.name=='ρ':
if n.name in (DIFFERENCE,UNION,INTERSECTION) and n.left.name==n.right.name and n.left.name==RENAME:
l_vars={}
for i in n.left.prop.split(','):
q=i.split('')
q=i.split(ARROW)
l_vars[q[0].strip()]=q[1].strip()
r_vars={}
for i in n.right.prop.split(','):
q=i.split('')
q=i.split(ARROW)
r_vars[q[0].strip()]=q[1].strip()
if r_vars==l_vars:
@ -240,7 +255,7 @@ def swap_union_renames(n):
q.left=n.left.child
q.right=n.right.child
n.name='ρ'
n.name=RENAME
n.kind=parser.UNARY
n.child=q
n.prop=n.left.prop
@ -252,14 +267,14 @@ def futile_renames(n):
'''This function purges renames like id->id'''
changes=0
if n.name=='ρ':
if n.name==RENAME:
#Located two nested renames.
changes=1
#Creating a dictionary with the attributes
_vars={}
for i in n.prop.split(','):
q=i.split('')
q=i.split(ARROW)
_vars[q[0].strip()]=q[1].strip()
#Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c"
for key in list(_vars.keys()):
@ -290,7 +305,7 @@ def subsequent_renames(n):
futile_renames(n)
changes=0
if n.name=='ρ' and n.child.name==n.name:
if n.name==RENAME and n.child.name==n.name:
#Located two nested renames.
changes=1
#Joining the attribute into one
@ -300,7 +315,7 @@ def subsequent_renames(n):
#Creating a dictionary with the attributes
_vars={}
for i in n.prop.split(','):
q=i.split('')
q=i.split(ARROW)
_vars[q[0].strip()]=q[1].strip()
#Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c"
for key in list(_vars.keys()):
@ -338,7 +353,7 @@ def tokenize_select(expression):
selection. The expression can contain parenthesis.
It will use a subclass of str with the attribute level, which
will specify the nesting level of the token into parenthesis.'''
g=generate_tokens(StringIO(expression).readline)
g=generate_tokens(StringIO(str(expression)).readline)
l=list(token[1] for token in g)
l.remove('')
@ -375,13 +390,13 @@ def swap_rename_projection(n):
'''
changes=0
if n.name=='π' and n.child.name=='ρ':
if n.name==PROJECTION and n.child.name==RENAME:
changes=1
#π index,name(ρ id➡index(R))
_vars={}
for i in n.child.prop.split(','):
q=i.split('')
q=i.split(ARROW)
_vars[q[1].strip()]=q[0].strip()
_pr=n.prop.split(',')
@ -401,7 +416,7 @@ def swap_rename_projection(n):
n.prop+='%s%s,' % (_vars[i],i)
n.prop=n.prop[:-1]
n.child.name='π'
n.child.name=PROJECTION
n.child.prop=''
for i in _pr:
n.child.prop+=i+','
@ -416,12 +431,12 @@ def swap_rename_select(n):
selection, so the operation is still valid.'''
changes=0
if n.name=='σ' and n.child.name=='ρ':
if n.name==SELECTION and n.child.name==RENAME:
changes=1
#Dictionary containing attributes of rename
_vars={}
for i in n.child.prop.split(','):
q=i.split('')
q=i.split(ARROW)
_vars[q[1].strip()]=q[0].strip()
#tokenizes expression in select
@ -437,8 +452,8 @@ def swap_rename_select(n):
_tokens[i]=_vars[_tokens[i].split('.')[0]]+'.'+splitted[1]
#Swapping operators
n.name='ρ'
n.child.name='σ'
n.name=RENAME
n.child.name=SELECTION
n.prop=n.child.prop
n.child.prop=''
@ -452,16 +467,16 @@ def select_union_intersect_subtract(n):
and replaces them with σ (i OR q) (a)
Removing a O() operation like the union'''
changes=0
if n.name in ('', '', '-') and n.left.name=='σ' and n.right.name=='σ' and n.left.child==n.right.child:
if n.name in (UNION, INTERSECTION, DIFFERENCE) and n.left.name==SELECTION and n.right.name==SELECTION and n.left.child==n.right.child:
cahnges=1
d={'':'or', '':'and', '-':'and not'}
d={UNION:'or', INTERSECTION:'and', DIFFERENCE:'and not'}
op=d[n.name]
newnode=parser.node()
newnode.prop='((%s) %s (%s))' % (n.left.prop,op,n.right.prop)
newnode.name='σ'
newnode.name=SELECTION
newnode.child=n.left.child
newnode.kind=parser.UNARY
replace_node(n,newnode)
@ -474,12 +489,11 @@ def selection_and_product(n,rels):
i contains attributes belonging to Q and l contains attributes belonging to both'''
changes=0
if n.name=='σ' and n.child.name in ('*','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
if n.name==SELECTION and n.child.name in (PRODUCT,JOIN,JOIN_LEFT,JOIN_RIGHT,JOIN_FULL):
l_attr=n.child.left.result_format(rels)
r_attr=n.child.right.result_format(rels)
tokens=tokenize_select(n.prop)
groups=[]
temp=[]
@ -521,7 +535,7 @@ def selection_and_product(n,rels):
if len(left)>0:
changes=1
l_node=parser.node()
l_node.name='σ'
l_node.name=SELECTION
l_node.kind=parser.UNARY
l_node.child=n.child.left
l_node.prop=''
@ -539,7 +553,7 @@ def selection_and_product(n,rels):
if len(right)>0:
changes=1
r_node=parser.node()
r_node.name='σ'
r_node.name=SELECTION
r_node.prop=''
r_node.kind=parser.UNARY
r_node.child=n.child.right

@ -33,6 +33,8 @@ import parser
RELATION=parser.RELATION
UNARY=parser.UNARY
BINARY=parser.BINARY
b_operators=parser.b_operators
u_operators=parser.u_operators
op_functions=parser.op_functions
@ -53,12 +55,12 @@ def optimize_all(expression,rels,specific=True,general=True,debug=None):
steps.
Return value: this will return an optimized version of the expression'''
if isinstance(expression,str):
if isinstance(expression,unicode):
n=tree(expression) #Gets the tree
elif isinstance(expression,node):
n=expression
else:
raise (TypeError("expression must be a string or a node"))
raise (TypeError("expression must be a unicode string or a node"))
if isinstance(debug,list):
dbg=True
@ -118,7 +120,7 @@ if __name__=="__main__":
rels["D1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/dates.csv")
rels["S1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/skillo.csv")
print rels'''
n=tree("π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))")
n=tree(u"π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))")
#n=tree("σ id==3 and indice==2 and name==5 or name<2(P1 * S1)")
print n
print n.toPython()

@ -45,10 +45,25 @@
RELATION=0
UNARY=1
BINARY=2
b_operators=('*','-','','','÷','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ') # List of binary operators
u_operators=('π','σ','ρ') # List of unary operators
op_functions={'*':'product','-':'difference','':'union','':'intersection','÷':'division','ᐅᐊ':'join','ᐅLEFTᐊ':'outer_left','ᐅRIGHTᐊ':'outer_right','ᐅFULLᐊ':'outer','π':'projection','σ':'selection','ρ':'rename'} # Associates operator with python method
PRODUCT=u'*'
DIFFERENCE=u'-'
UNION=u''
INTERSECTION=u''
DIVISION=u'÷'
JOIN=u'ᐅᐊ'
JOIN_LEFT=u'ᐅLEFTᐊ'
JOIN_RIGHT=u'ᐅRIGHTᐊ'
JOIN_FULL=u'ᐅFULLᐊ'
PROJECTION=u'π'
SELECTION=u'σ'
RENAME=u'ρ'
ARROW=u''
b_operators=(u'*',u'-',u'',u'',u'÷',u'ᐅᐊ',u'ᐅLEFTᐊ',u'ᐅRIGHTᐊ',u'ᐅFULLᐊ') # List of binary operators
u_operators=(u'π',u'σ',u'ρ') # List of unary operators
op_functions={u'*':'product',u'-':'difference',u'':'union',u'':'intersection',u'÷':'division',u'ᐅᐊ':'join',u'ᐅLEFTᐊ':'outer_left',u'ᐅRIGHTᐊ':'outer_right',u'ᐅFULLᐊ':'outer',u'π':'projection',u'σ':'selection',u'ρ':'rename'} # Associates operator with python method
class node (object):
'''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.
@ -77,11 +92,11 @@ class node (object):
expression=expression[0]
#The list contains only 1 string. Means it is the name of a relation
if len(expression)==1 and isinstance(expression[0],str):
if len(expression)==1 and isinstance(expression[0],unicode):
self.kind=RELATION
self.name=expression[0]
return
'''Expression from right to left, searching for binary operators
this means that binary operators have lesser priority than
unary operators.
@ -93,7 +108,7 @@ class node (object):
within sub-lists, they won't be found here, ensuring that they will
have highest priority.'''
for i in range(len(expression)-1,-1,-1):
if expression[i] in b_operators: #Binary operator
if expression[i] in b_operators: #Binary operator
self.kind=BINARY
self.name=expression[i]
self.left=node(expression[:i])
@ -118,9 +133,9 @@ class node (object):
prop =self.prop
#Converting parameters
if self.name=='π':#Projection
if self.name==u'π':#Projection
prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"')
elif self.name=="ρ": #Rename
elif self.name==u"ρ": #Rename
prop='{\"%s\"}' % prop.replace(',','\",\"').replace('','\":\"').replace(' ','')
else: #Selection
prop='\"%s\"' % prop
@ -234,6 +249,9 @@ def tokenize(expression):
'''This function converts an expression into a list where
every token of the expression is an item of a list. Expressions into
parenthesis will be converted into sublists.'''
if not isinstance(expression,unicode):
raise Exception('expected unicode')
items=[] #List for the tokens
'''This is a state machine. Initial status is determined by the starting of the
@ -266,9 +284,9 @@ def tokenize(expression):
#Removes the entire parentesis and content from the expression
expression=expression[end+1:].strip()
elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes
items.append(expression[0:2]) #Adding operator in the top of the list
expression=expression[2:].strip() #Removing operator from the expression
elif expression.startswith(u"σ") or expression.startswith(u"π") or expression.startswith(u"ρ"): #Unary 2 bytes
items.append(expression[0:1]) #Adding operator in the top of the list
expression=expression[1:].strip() #Removing operator from the expression
if expression.startswith('('): #Expression with parenthesis, so adding what's between open and close without tokenization
par=expression.find('(',_find_matching_parenthesis(expression))
@ -281,18 +299,18 @@ def tokenize(expression):
items.append(expression[0])
expression=expression[1:].strip() #1 char from the expression
state=4
elif expression.startswith("") or expression.startswith(""): #Binary short 3 bytes
items.append(expression[0:3]) #Adding operator in the top of the list
expression=expression[3:].strip() #Removing operator from the expression
elif expression.startswith(u"") or expression.startswith(u""): #Binary short 3 bytes
items.append(expression[0:1]) #Adding operator in the top of the list
expression=expression[1:].strip() #Removing operator from the expression
state=4
elif expression.startswith("÷"): #Binary short 2 bytes
items.append(expression[0:2]) #Adding operator in the top of the list
expression=expression[2:].strip() #Removing operator from the expression
elif expression.startswith(u"÷"): #Binary short 2 bytes
items.append(expression[0:1]) #Adding operator in the top of the list
expression=expression[1:].strip() #Removing operator from the expression
state=4
elif expression.startswith(""): #Binary long
i=expression.find("")
items.append(expression[:i+3])
expression=expression[i+3:].strip()
elif expression.startswith(u""): #Binary long
i=expression.find(u"")
items.append(expression[:i+1])
expression=expression[i+1:].strip()
state=4
else: #Relation (hopefully)
@ -347,6 +365,10 @@ def parse(expr):
return tree(expr).toPython()
if __name__=="__main__":
while True:
e=raw_input("Expression: ")
print parse(e)
#while True:
# e=raw_input("Expression: ")
# print parse(e)
b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)"
print b[0]
parse(b)

@ -136,10 +136,7 @@ class Ui_Dialog(object):
self.label_3.setText(QtGui.QApplication.translate("Dialog", "Version "+version, None, QtGui.QApplication.UnicodeUTF8))
self.label_3.setTextInteractionFlags(QtCore.Qt.LinksAccessibleByMouse|QtCore.Qt.TextSelectableByMouse)
self.groupBox_3.setTitle(QtGui.QApplication.translate("Dialog", "Author", None, QtGui.QApplication.UnicodeUTF8))
if os.name=='nt': #Differentiates acknowledgements depending on the system
self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli &lt;<a href=\"mailto:tiposchi@tiscali.it\">tiposchi@tiscali.it</a>&gt;<br>Emilio Di Prima &lt;emiliodiprima [at] msn [dot] com&gt; (For the windows version)", None, QtGui.QApplication.UnicodeUTF8))
else:
self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli &lt;<a href=\"mailto:tiposchi@tiscali.it\">tiposchi@tiscali.it</a>&gt;", None, QtGui.QApplication.UnicodeUTF8))
self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli &lt;<a href=\"mailto:tiposchi@tiscali.it\">tiposchi@tiscali.it</a>&gt;<br>Emilio Di Prima &lt;emiliodiprima[at]msn[dot]com&gt; (For the windows version)", None, QtGui.QApplication.UnicodeUTF8))
self.label_2.setOpenExternalLinks (True)
self.label_2.setTextInteractionFlags(QtCore.Qt.LinksAccessibleByMouse|QtCore.Qt.TextSelectableByMouse)
self.groupBox_2.setTitle(QtGui.QApplication.translate("Dialog", "Links", None, QtGui.QApplication.UnicodeUTF8))

@ -31,8 +31,8 @@ except:
def get_py_str(a):
'''Returns a python string out of a QString'''
if pyqt:
return str(a.toUtf8())
return str(a.encode("utf-8")) #Already a python string in PySide
return unicode(a.toUtf8(),'utf-8')
return unicode(a.encode("utf-8")) #Already a python string in PySide
def set_utf8_text(component,text):
if not pyqt:

@ -81,13 +81,14 @@ class relForm(QtGui.QMainWindow):
'''Executes the query'''
query=compatibility.get_py_str(self.ui.txtQuery.text())
print query.__class__
res_rel=compatibility.get_py_str(self.ui.txtResult.text())#result relation's name
if not rtypes.is_valid_relation_name(res_rel):
QtGui.QMessageBox.information(self,QtGui.QApplication.translate("Form", "Error"),QtGui.QApplication.translate("Form", "Wrong name for destination relation."))
return
expr=parser.parse(query)#Converting expression to python code
try:
#Converting string to utf8 and then from qstring to normal string
expr=parser.parse(query)#Converting expression to python code
@ -105,7 +106,7 @@ class relForm(QtGui.QMainWindow):
#Adds to history
item='%s = %s' % (compatibility.get_py_str(self.ui.txtResult.text()),compatibility.get_py_str(self.ui.txtQuery.text()))
item=unicode(item.decode('utf-8'))
#item=item.decode('utf-8'))
compatibility.add_list_item(self.ui.lstHistory,item)
self.qcounter+=1