From 1b049e13f0529d5bc78b2374f3715c09952ce0c3 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Sat, 9 Apr 2016 11:37:28 +0200 Subject: [PATCH] Better handling of parenthesis inside string literals When scanning for the matching parenthesis, string literals were not taken into account, which would inevitably lead to errors if one wanted to do a selection on a string containing a parenthesis. Now it uses a state-machine to skip the ones that are located inside literals. --- CHANGELOG | 4 ++-- relational/parser.py | 40 +++++++++++++++++++++++++++++++++++++++- test/par1.query | 1 + test/par1.result | 1 + test/par2.query | 1 + test/par2.result | 1 + test/par3.query | 1 + test/par3.result | 1 + test/par4.query | 1 + test/par4.result | 1 + 10 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 test/par1.query create mode 100644 test/par1.result create mode 100644 test/par2.query create mode 100644 test/par2.result create mode 100644 test/par3.query create mode 100644 test/par3.result create mode 100644 test/par4.query create mode 100644 test/par4.result diff --git a/CHANGELOG b/CHANGELOG index bd139c1..9719834 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,8 +2,8 @@ - Improve error reporting - Release is now signed with PGP - Doesn't crash on network errors -- Fixed optimization introduced in 2.2 - that did not hold in all cases. +- Fixed optimization introduced in 2.2 that did not hold in all cases +- Better handling of parenthesis inside string literals 2.3 - Very small release. The windows setup now installs the C++ library diff --git a/relational/parser.py b/relational/parser.py index e351d8c..187cb8d 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -291,7 +291,21 @@ def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')') close parenthesis to the 1st open parenthesis found starting from start (0 by default)''' par_count = 0 # Count of parenthesis + + string = False + escape = False + for i in range(start, len(expression)): + if expression[i] == '\'' and not escape: + string = not string + if expression[i] == '\\' and not escape: + escape = True + else: + escape = False + if string: + continue + + if expression[i] == openpar: par_count += 1 elif expression[i] == closepar: @@ -299,6 +313,30 @@ def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')') if par_count == 0: return i # Closing parenthesis of the parameter +def _find_token(haystack, needle): + ''' + Like the string function find, but + ignores tokens that are within a string + literal. + ''' + r = -1 + string = False + escape = False + + for i in range(len(haystack)): + if haystack[i] == '\'' and not escape: + string = not string + if haystack[i] == '\\' and not escape: + escape = True + else: + escape = False + if string: + continue + + if haystack[i:].startswith(needle): + return i + return r + def tokenize(expression): '''This function converts a relational expression into a list where @@ -331,7 +369,7 @@ def tokenize(expression): par = expression.find( '(', _find_matching_parenthesis(expression)) else: # Expression without parenthesis, so adding what's between start and parenthesis as whole - par = expression.find('(') + par = _find_token(expression, '(') items.append(expression[:par].strip()) # Inserting parameter of the operator diff --git a/test/par1.query b/test/par1.query new file mode 100644 index 0000000..c0aca14 --- /dev/null +++ b/test/par1.query @@ -0,0 +1 @@ +σ name=='(' (people) diff --git a/test/par1.result b/test/par1.result new file mode 100644 index 0000000..9d637aa --- /dev/null +++ b/test/par1.result @@ -0,0 +1 @@ +id,name,chief,age diff --git a/test/par2.query b/test/par2.query new file mode 100644 index 0000000..5570f40 --- /dev/null +++ b/test/par2.query @@ -0,0 +1 @@ +σ (name=='(') (people) diff --git a/test/par2.result b/test/par2.result new file mode 100644 index 0000000..9d637aa --- /dev/null +++ b/test/par2.result @@ -0,0 +1 @@ +id,name,chief,age diff --git a/test/par3.query b/test/par3.query new file mode 100644 index 0000000..117f61f --- /dev/null +++ b/test/par3.query @@ -0,0 +1 @@ +σ (name==')') (people) diff --git a/test/par3.result b/test/par3.result new file mode 100644 index 0000000..9d637aa --- /dev/null +++ b/test/par3.result @@ -0,0 +1 @@ +id,name,chief,age diff --git a/test/par4.query b/test/par4.query new file mode 100644 index 0000000..6d8045f --- /dev/null +++ b/test/par4.query @@ -0,0 +1 @@ +σ name==')' (people) diff --git a/test/par4.result b/test/par4.result new file mode 100644 index 0000000..9d637aa --- /dev/null +++ b/test/par4.result @@ -0,0 +1 @@ +id,name,chief,age