An extended JSON parser implemented as a recursive descent parser
Python
#!/usr/bin/env python3
class ParseError(Exception):
def __init__(self, pos, msg, *args):
self.pos = pos
self.msg = msg
self.args = args
def __str__(self):
return '%s at position %s' % (self.msg % self.args, self.pos)
class Parser:
def __init__(self):
self.cache = {}
def parse(self, text):
self.text = text
self.pos = -1
self.len = len(text) - 1
rv = self.start()
self.assert_end()
return rv
def assert_end(self):
if self.pos < self.len:
raise ParseError(
self.pos + 1,
'Expected end of string but got %s',
self.text[self.pos + 1]
)
def eat_whitespace(self):
while self.pos < self.len and self.text[self.pos + 1] in " \f\v\r\t\n":
self.pos += 1
def split_char_ranges(self, chars):
try:
return self.cache[chars]
except KeyError:
pass
rv = []
index = 0
length = len(chars)
while index < length:
if index + 2 < length and chars[index + 1] == '-':
if chars[index] >= chars[index + 2]:
raise ValueError('Bad character range')
rv.append(chars[index:index + 3])
index += 3
else:
rv.append(chars[index])
index += 1
self.cache[chars] = rv
return rv
def char(self, chars=None):
if self.pos >= self.len:
raise ParseError(
self.pos + 1,
'Expected %s but got end of string',
'character' if chars is None else '[%s]' % chars
)
next_char = self.text[self.pos + 1]
if chars == None:
self.pos += 1
return next_char
for char_range in self.split_char_ranges(chars):
if len(char_range) == 1:
if next_char == char_range:
self.pos += 1
return next_char
elif char_range[0] <= next_char <= char_range[2]:
self.pos += 1
return next_char
raise ParseError(
self.pos + 1,
'Expected %s but got %s',
'character' if chars is None else '[%s]' % chars,
next_char
)
def keyword(self, *keywords):
self.eat_whitespace()
if self.pos >= self.len:
raise ParseError(
self.pos + 1,
'Expected %s but got end of string',
','.join(keywords)
)
for keyword in keywords:
low = self.pos + 1
high = low + len(keyword)
if self.text[low:high] == keyword:
self.pos += len(keyword)
self.eat_whitespace()
return keyword
raise ParseError(
self.pos + 1,
'Expected %s but got %s',
','.join(keywords),
self.text[self.pos + 1],
)
def match(self, *rules):
self.eat_whitespace()
last_error_pos = -1
last_exception = None
last_error_rules = []
for rule in rules:
initial_pos = self.pos
try:
rv = getattr(self, rule)()
self.eat_whitespace()
return rv
except ParseError as e:
self.pos = initial_pos
if e.pos > last_error_pos:
last_exception = e
last_error_pos = e.pos
last_error_rules.clear()
last_error_rules.append(rule)
elif e.pos == last_error_pos:
last_error_rules.append(rule)
if len(last_error_rules) == 1:
raise last_exception
else:
raise ParseError(
last_error_pos,
'Expected %s but got %s',
','.join(last_error_rules),
self.text[last_error_pos]
)
def maybe_char(self, chars=None):
try:
return self.char(chars)
except ParseError:
return None
def maybe_match(self, *rules):
try:
return self.match(*rules)
except ParseError:
return None
def maybe_keyword(self, *keywords):
try:
return self.keyword(*keywords)
except ParseError:
return None
class JSONParser(Parser):
def eat_whitespace(self):
is_processing_comment = False
while self.pos < self.len:
char = self.text[self.pos + 1]
if is_processing_comment:
if char == '\n':
is_processing_comment = False
else:
if char == '#':
is_processing_comment = True
elif char not in ' \f\v\r\t\n':
break
self.pos += 1
def start(self):
return self.match('any_type')
def any_type(self):
return self.match('complex_type', 'primitive_type')
def primitive_type(self):
return self.match('null', 'boolean', 'quoted_string', 'unquoted')
def complex_type(self):
return self.match('list', 'map')
def list(self):
rv = []
self.keyword('[')
while True:
item = self.maybe_match('any_type')
if item is None:
break
rv.append(item)
if not self.maybe_keyword(','):
break
self.keyword(']')
return rv
def map(self):
rv = {}
self.keyword('{')
while True:
item = self.maybe_match('pair')
if item is None:
break
rv[item[0]] = item[1]
if not self.maybe_keyword(','):
break
self.keyword('}')
return rv
def pair(self):
key = self.match('quoted_string', 'unquoted')
if type(key) is not str:
raise ParseError(
self.pos + 1,
'Expected string but got number',
self.text[self.pos + 1]
)
self.keyword(':')
value = self.match('any_type')
return key, value
def null(self):
self.keyword('null')
return None
def boolean(self):
boolean = self.keyword('true', 'false')
return boolean[0] == 't'
def unquoted(self):
acceptable_chars = '0-9A-Za-z \t!$%&()*+./;<=>?^_`|~-'
number_type = int
chars = [self.char(acceptable_chars)]
while True:
char = self.maybe_char(acceptable_chars)
if char is None:
break
if char in 'Ee.':
number_type = float
chars.append(char)
rv = ''.join(chars).rstrip(' \t')
try:
return number_type(rv)
except ValueError:
return rv
def quoted_string(self):
quote = self.char('"\'')
chars = []
escape_sequences = {
'b': '\b',
'f': '\f',
'n': '\n',
'r': '\r',
't': '\t',
}
while True:
char = self.char()
if char == quote:
break
elif char == '\\':
escape = self.char()
if escape == 'u':
code_point = []
for i in range(4):
code_point.append(self.char('0-9a-fA-F'))
chars.append(chr(int(''.join(code_point), 16)))
else:
chars.append(escape_sequences.get(char, char))
else:
chars.append(char)
return ''.join(chars)
import sys
from pprint import pprint
parser = JSONParser()
try:
pprint(parser.parse(sys.stdin.read()))
except ParseError as e:
print('Error: '+ str(e))
Output
Embed on website
To embed this program on your website, copy the following code and paste it into your website's HTML:
Comments
This comment belongs to a banned user and is only visible to admins.
This comment belongs to a deleted user and is only visible to admins.