def sanitize_json(mixed_content):
json_string = extract_json(mixed_content)
json_string = re.sub(r'\\(?!["\\/bfnrt]|u[0-9a-fA-F]{4})', '', json_string)
json_string = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', json_string)
json_string = re.sub(r'"\s*\n\s*"', '""', json_string)
json_string = re.sub(r'\s*\n\s*', '', json_string)
return json_string
def extract_json(mixed_content):
json_str = ''
stack = []
slash = False
for i, char in enumerate(mixed_content):
if slash:
slash = False
continue
if char == '{':
stack.append(i)
elif char == '}':
if not stack:
continue
start = stack.pop()
json_str = mixed_content[start:i + 1]
elif char == '\\':
slash = True
return json_str