Mercurial > hg > expressionparser
comparison expr.py @ 0:ae57e69e4b15
simple expression parser
| author | Ted Mielczarek <ted.mielczarek@gmail.com> |
|---|---|
| date | Wed, 01 Jun 2011 19:58:56 -0400 |
| parents | |
| children | c45135ec8c13 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ae57e69e4b15 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import re, unittest | |
| 4 | |
| 5 # ideas taken from http://effbot.org/zone/simple-top-down-parsing.htm | |
| 6 # token classes | |
| 7 class ident_token: | |
| 8 def __init__(self, value): | |
| 9 self.value = value | |
| 10 def nud(self, parser): | |
| 11 # identifiers take their value from the value mappings passed | |
| 12 # to the parser | |
| 13 return parser.value(self.value) | |
| 14 | |
| 15 class int_token: | |
| 16 def __init__(self, value): | |
| 17 self.value = int(value) | |
| 18 def nud(self, parser): | |
| 19 return self.value | |
| 20 | |
| 21 class bool_token: | |
| 22 def __init__(self, value): | |
| 23 self.value = {'true':True, 'false':False}[value] | |
| 24 def nud(self, parser): | |
| 25 return self.value | |
| 26 | |
| 27 class eq_op_token: | |
| 28 "==" | |
| 29 lbp = 20 | |
| 30 def led(self, parser, left): | |
| 31 return left == parser.expression(self.lbp) | |
| 32 | |
| 33 class neq_op_token: | |
| 34 "!=" | |
| 35 lbp = 20 | |
| 36 def led(self, parser, left): | |
| 37 return left != parser.expression(self.lbp) | |
| 38 | |
| 39 class and_op_token: | |
| 40 "&&" | |
| 41 lbp = 11 | |
| 42 def led(self, parser, left): | |
| 43 right = parser.expression(self.lbp) | |
| 44 return left and right | |
| 45 | |
| 46 class or_op_token: | |
| 47 "||" | |
| 48 lbp = 10 | |
| 49 def led(self, parser, left): | |
| 50 right = parser.expression(self.lbp) | |
| 51 return left or right | |
| 52 | |
| 53 class lparen_token: | |
| 54 "(" | |
| 55 lbp = 50 | |
| 56 def nud(self, parser): | |
| 57 expr = parser.expression() | |
| 58 parser.advance(rparen_token) | |
| 59 return expr | |
| 60 | |
| 61 class rparen_token: | |
| 62 ")" | |
| 63 lbp = 0 | |
| 64 | |
| 65 class string_token: | |
| 66 def __init__(self, value): | |
| 67 self.value = value | |
| 68 def nud(self, parser): | |
| 69 return self.value | |
| 70 | |
| 71 class end_token: | |
| 72 # lowest left binding power, always ends parsing | |
| 73 lbp = 0 | |
| 74 | |
| 75 class ExpressionParser(object): | |
| 76 def __init__(self, text, valuemapping): | |
| 77 """ | |
| 78 Initialize the parser with input |text|, and |valuemapping| as | |
| 79 a dict mapping identifier names to values. | |
| 80 """ | |
| 81 self.text = text | |
| 82 self.valuemapping = valuemapping | |
| 83 | |
| 84 def _tokenize(self): | |
| 85 """ | |
| 86 Lex the input text into tokens and yield them in sequence. | |
| 87 """ | |
| 88 # scanner callbacks | |
| 89 def bool_(scanner, t): return bool_token(t) | |
| 90 def identifier(scanner, t): return ident_token(t) | |
| 91 def integer(scanner, t): return int_token(t) | |
| 92 def eq(scanner, t): return eq_op_token() | |
| 93 def neq(scanner, t): return neq_op_token() | |
| 94 def or_(scanner, t): return or_op_token() | |
| 95 def and_(scanner, t): return and_op_token() | |
| 96 def lparen(scanner, t): return lparen_token() | |
| 97 def rparen(scanner, t): return rparen_token() | |
| 98 def string_(scanner, t): return string_token(t[1:-1]) | |
| 99 | |
| 100 scanner = re.Scanner([ | |
| 101 (r"true|false", bool_), | |
| 102 (r"[a-zA-Z_]\w*", identifier), | |
| 103 (r"[0-9]+", integer), | |
| 104 (r'"[^"]*"', string_), | |
| 105 (r"==", eq), | |
| 106 (r"!=", neq), | |
| 107 (r"\|\|", or_), | |
| 108 (r"&&", and_), | |
| 109 (r"\(", lparen), | |
| 110 (r"\)", rparen), | |
| 111 (r"\s+", None), # skip whitespace | |
| 112 ]) | |
| 113 tokens, remainder = scanner.scan(self.text) | |
| 114 for t in tokens: | |
| 115 yield t | |
| 116 yield end_token() | |
| 117 | |
| 118 def value(self, ident): | |
| 119 """ | |
| 120 Look up the value of |ident| in the value mapping passed in the | |
| 121 constructor. | |
| 122 """ | |
| 123 return self.valuemapping[ident] | |
| 124 | |
| 125 def advance(self, expected): | |
| 126 """ | |
| 127 Assert that the next token is an instance of |expected|, and advance | |
| 128 to the next token. | |
| 129 """ | |
| 130 if not isinstance(self.token, expected): | |
| 131 raise Exception, "Unexpected token!" | |
| 132 self.token = self.iter.next() | |
| 133 | |
| 134 def expression(self, rbp=0): | |
| 135 """ | |
| 136 Parse and return the value of an expression until a token with | |
| 137 right binding power greater than rbp is encountered. | |
| 138 """ | |
| 139 t = self.token | |
| 140 self.token = self.iter.next() | |
| 141 left = t.nud(self) | |
| 142 while rbp < self.token.lbp: | |
| 143 t = self.token | |
| 144 self.token = self.iter.next() | |
| 145 left = t.led(self, left) | |
| 146 return left | |
| 147 | |
| 148 def parse(self): | |
| 149 """ | |
| 150 Parse and return the value of the expression in the text | |
| 151 passed to the constructor. | |
| 152 """ | |
| 153 self.iter = self._tokenize() | |
| 154 self.token = self.iter.next() | |
| 155 return self.expression() | |
| 156 | |
| 157 class ExpressionParserUnittest(unittest.TestCase): | |
| 158 def parse(self, text, values): | |
| 159 return ExpressionParser(text, values).parse() | |
| 160 | |
| 161 def test_BasicValues(self): | |
| 162 self.assertEqual(1, self.parse("1", {})) | |
| 163 self.assertEqual(100, self.parse("100", {})) | |
| 164 self.assertEqual(True, self.parse("true", {})) | |
| 165 self.assertEqual(False, self.parse("false", {})) | |
| 166 self.assertEqual("", self.parse('""', {})) | |
| 167 self.assertEqual("foo bar", self.parse('"foo bar"', {})) | |
| 168 self.assertEqual(1, self.parse("foo", {'foo':1})) | |
| 169 self.assertEqual(True, self.parse("bar", {'bar':True})) | |
| 170 self.assertEqual("xyz", self.parse("abc123", {'abc123':"xyz"})) | |
| 171 | |
| 172 def test_Equality(self): | |
| 173 self.assertTrue(self.parse("true == true", {})) | |
| 174 self.assertTrue(self.parse("false == false", {})) | |
| 175 self.assertTrue(self.parse("false == false", {})) | |
| 176 self.assertTrue(self.parse("1 == 1", {})) | |
| 177 self.assertTrue(self.parse("100 == 100", {})) | |
| 178 self.assertTrue(self.parse('"some text" == "some text"', {})) | |
| 179 self.assertTrue(self.parse("true != false", {})) | |
| 180 self.assertTrue(self.parse("1 != 2", {})) | |
| 181 self.assertTrue(self.parse('"text" != "other text"', {})) | |
| 182 self.assertTrue(self.parse("foo == true", {'foo': True})) | |
| 183 self.assertTrue(self.parse("foo == 1", {'foo': 1})) | |
| 184 self.assertTrue(self.parse('foo == "bar"', {'foo': 'bar'})) | |
| 185 self.assertTrue(self.parse("foo == bar", {'foo': True, 'bar': True})) | |
| 186 self.assertTrue(self.parse("true == foo", {'foo': True})) | |
| 187 self.assertTrue(self.parse("foo != true", {'foo': False})) | |
| 188 self.assertTrue(self.parse("foo != 2", {'foo': 1})) | |
| 189 self.assertTrue(self.parse('foo != "bar"', {'foo': 'abc'})) | |
| 190 self.assertTrue(self.parse("foo != bar", {'foo': True, 'bar': False})) | |
| 191 self.assertTrue(self.parse("true != foo", {'foo': False})) | |
| 192 | |
| 193 def test_Conjunctions(self): | |
| 194 self.assertTrue(self.parse("true && true", {})) | |
| 195 self.assertTrue(self.parse("true || false", {})) | |
| 196 self.assertFalse(self.parse("false || false", {})) | |
| 197 self.assertFalse(self.parse("true && false", {})) | |
| 198 | |
| 199 if __name__ == '__main__': | |
| 200 | |
| 201 | |
| 202 | |
| 203 | |
| 204 unittest.main() | |
| 205 | |
| 206 #parser = ExpressionParser(sys.argv[1], dict((a,int(b)) for a,b in (x.split('=') for x in sys.argv[2:]))) | |
| 207 #print "%s: %s" % (sys.argv[1],parser.parse()) |
