mirror of
https://github.com/arendst/Tasmota.git
synced 2025-07-23 10:46:31 +00:00
Berry vulnerability in JSON parsing for unicode (#23603)
This commit is contained in:
parent
e9b62811c7
commit
decdfc6b51
@ -16,6 +16,7 @@ All notable changes to this project will be documented in this file.
|
||||
|
||||
### Fixed
|
||||
- LVGL restore `lv_chart.set_range` removed in LVGL 9.3.0 in favor of `lv_chart.set_axis_range` (#23567)
|
||||
- Berry vulnerability in JSON parsing for unicode
|
||||
|
||||
### Removed
|
||||
|
||||
|
@ -259,6 +259,7 @@
|
||||
#undef BE_USE_SOLIDIFY_MODULE
|
||||
#define BE_USE_DEBUG_MODULE 1
|
||||
#define BE_USE_SOLIDIFY_MODULE 1
|
||||
#define BE_MAPPING_ENABLE_INPUT_VALIDATION 1 // input validation for lv_mapping
|
||||
#endif // USE_BERRY_DEBUG
|
||||
|
||||
/* Macro: BE_EXPLICIT_XXX
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "be_lexer.h"
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if BE_USE_JSON_MODULE
|
||||
|
||||
@ -20,6 +21,9 @@
|
||||
#define INDENT_WIDTH 2
|
||||
#define INDENT_CHAR ' '
|
||||
|
||||
/* Security: Maximum JSON string length to prevent memory exhaustion attacks */
|
||||
#define MAX_JSON_STRING_LEN (1024 * 1024) /* 1MB limit */
|
||||
|
||||
static const char* parser_value(bvm *vm, const char *json);
|
||||
static void value_dump(bvm *vm, int *indent, int idx, int fmt);
|
||||
|
||||
@ -62,21 +66,66 @@ static int is_object(bvm *vm, const char *class, int idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int json_strlen(const char *json)
|
||||
/* Calculate the actual buffer size needed for JSON string parsing
|
||||
* accounting for Unicode expansion and security limits */
|
||||
static size_t json_strlen_safe(const char *json, size_t *actual_len)
|
||||
{
|
||||
int ch;
|
||||
const char *s = json + 1; /* skip '"' */
|
||||
/* get string length "(\\.|[^"])*" */
|
||||
size_t char_count = 0;
|
||||
size_t byte_count = 0;
|
||||
|
||||
while ((ch = *s) != '\0' && ch != '"') {
|
||||
char_count++;
|
||||
if (char_count > MAX_JSON_STRING_LEN) {
|
||||
return SIZE_MAX; /* String too long */
|
||||
}
|
||||
|
||||
++s;
|
||||
if (ch == '\\') {
|
||||
ch = *s++;
|
||||
if (ch == '\0') {
|
||||
return -1;
|
||||
return SIZE_MAX; /* Malformed string */
|
||||
}
|
||||
|
||||
switch (ch) {
|
||||
case '"': case '\\': case '/':
|
||||
case 'b': case 'f': case 'n': case 'r': case 't':
|
||||
byte_count += 1;
|
||||
break;
|
||||
case 'u':
|
||||
/* Unicode can expand to 1-3 UTF-8 bytes
|
||||
* We conservatively assume 3 bytes for safety */
|
||||
byte_count += 3;
|
||||
/* Verify we have 4 hex digits following */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!s[i] || !isxdigit((unsigned char)s[i])) {
|
||||
return SIZE_MAX; /* Invalid unicode sequence */
|
||||
}
|
||||
}
|
||||
s += 4; /* Skip the 4 hex digits */
|
||||
break;
|
||||
default:
|
||||
return SIZE_MAX; /* Invalid escape sequence */
|
||||
}
|
||||
} else if (ch >= 0 && ch <= 0x1f) {
|
||||
return SIZE_MAX; /* Unescaped control character */
|
||||
} else {
|
||||
byte_count += 1;
|
||||
}
|
||||
|
||||
/* Check for potential overflow */
|
||||
if (byte_count > MAX_JSON_STRING_LEN) {
|
||||
return SIZE_MAX;
|
||||
}
|
||||
}
|
||||
return ch ? cast_int(s - json - 1) : -1;
|
||||
|
||||
if (ch != '"') {
|
||||
return SIZE_MAX; /* Unterminated string */
|
||||
}
|
||||
|
||||
*actual_len = char_count;
|
||||
return byte_count;
|
||||
}
|
||||
|
||||
static void json2berry(bvm *vm, const char *class)
|
||||
@ -117,55 +166,94 @@ static const char* parser_null(bvm *vm, const char *json)
|
||||
|
||||
static const char* parser_string(bvm *vm, const char *json)
|
||||
{
|
||||
if (*json == '"') {
|
||||
int len = json_strlen(json++);
|
||||
if (len > -1) {
|
||||
int ch;
|
||||
char *buf, *dst = buf = be_malloc(vm, len);
|
||||
while ((ch = *json) != '\0' && ch != '"') {
|
||||
++json;
|
||||
if (ch == '\\') {
|
||||
ch = *json++; /* skip '\' */
|
||||
switch (ch) {
|
||||
case '"': *dst++ = '"'; break;
|
||||
case '\\': *dst++ = '\\'; break;
|
||||
case '/': *dst++ = '/'; break;
|
||||
case 'b': *dst++ = '\b'; break;
|
||||
case 'f': *dst++ = '\f'; break;
|
||||
case 'n': *dst++ = '\n'; break;
|
||||
case 'r': *dst++ = '\r'; break;
|
||||
case 't': *dst++ = '\t'; break;
|
||||
case 'u': { /* load unicode */
|
||||
dst = be_load_unicode(dst, json);
|
||||
if (dst == NULL) {
|
||||
be_free(vm, buf, len);
|
||||
return NULL;
|
||||
}
|
||||
json += 4;
|
||||
break;
|
||||
}
|
||||
default: be_free(vm, buf, len); return NULL; /* error */
|
||||
}
|
||||
} else if(ch >= 0 && ch <= 0x1f) {
|
||||
/* control characters must be escaped
|
||||
as per https://www.rfc-editor.org/rfc/rfc7159#section-7 */
|
||||
be_free(vm, buf, len);
|
||||
if (*json != '"') {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t char_len;
|
||||
size_t byte_len = json_strlen_safe(json, &char_len);
|
||||
|
||||
if (byte_len == SIZE_MAX) {
|
||||
return NULL; /* Invalid or too long string */
|
||||
}
|
||||
|
||||
if (byte_len == 0) {
|
||||
/* Empty string */
|
||||
be_stack_require(vm, 1 + BE_STACK_FREE_MIN);
|
||||
be_pushstring(vm, "");
|
||||
return json + 2; /* Skip opening and closing quotes */
|
||||
}
|
||||
|
||||
/* Allocate buffer - size is correctly calculated by json_strlen_safe */
|
||||
char *buf = be_malloc(vm, byte_len + 1);
|
||||
if (!buf) {
|
||||
return NULL; /* Out of memory */
|
||||
}
|
||||
|
||||
char *dst = buf;
|
||||
const char *src = json + 1; /* Skip opening quote */
|
||||
int ch;
|
||||
|
||||
while ((ch = *src) != '\0' && ch != '"') {
|
||||
++src;
|
||||
if (ch == '\\') {
|
||||
ch = *src++;
|
||||
switch (ch) {
|
||||
case '"':
|
||||
*dst++ = '"';
|
||||
break;
|
||||
case '\\':
|
||||
*dst++ = '\\';
|
||||
break;
|
||||
case '/':
|
||||
*dst++ = '/';
|
||||
break;
|
||||
case 'b':
|
||||
*dst++ = '\b';
|
||||
break;
|
||||
case 'f':
|
||||
*dst++ = '\f';
|
||||
break;
|
||||
case 'n':
|
||||
*dst++ = '\n';
|
||||
break;
|
||||
case 'r':
|
||||
*dst++ = '\r';
|
||||
break;
|
||||
case 't':
|
||||
*dst++ = '\t';
|
||||
break;
|
||||
case 'u': {
|
||||
dst = be_load_unicode(dst, src);
|
||||
if (dst == NULL) {
|
||||
be_free(vm, buf, byte_len + 1);
|
||||
return NULL;
|
||||
} else {
|
||||
*dst++ = (char)ch;
|
||||
}
|
||||
src += 4;
|
||||
break;
|
||||
}
|
||||
be_assert(ch == '"');
|
||||
/* require the stack to have some free space for the string,
|
||||
since parsing deeply nested objects might
|
||||
crash the VM due to insufficient stack space. */
|
||||
be_stack_require(vm, 1 + BE_STACK_FREE_MIN);
|
||||
be_pushnstring(vm, buf, cast_int(dst - buf));
|
||||
be_free(vm, buf, len);
|
||||
return json + 1; /* skip '"' */
|
||||
default:
|
||||
be_free(vm, buf, byte_len + 1);
|
||||
return NULL; /* Invalid escape */
|
||||
}
|
||||
} else if (ch >= 0 && ch <= 0x1f) {
|
||||
be_free(vm, buf, byte_len + 1);
|
||||
return NULL; /* Unescaped control character */
|
||||
} else {
|
||||
*dst++ = (char)ch;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
|
||||
if (ch != '"') {
|
||||
be_free(vm, buf, byte_len + 1);
|
||||
return NULL; /* Unterminated string */
|
||||
}
|
||||
|
||||
/* Success - create Berry string */
|
||||
be_stack_require(vm, 1 + BE_STACK_FREE_MIN);
|
||||
be_pushnstring(vm, buf, (size_t)(dst - buf));
|
||||
be_free(vm, buf, byte_len + 1);
|
||||
return src + 1; /* Skip closing quote */
|
||||
}
|
||||
|
||||
static const char* parser_field(bvm *vm, const char *json)
|
||||
|
@ -93,3 +93,154 @@ for count : 10..200
|
||||
end
|
||||
json.dump(arr)
|
||||
end
|
||||
|
||||
# Security tests for JSON parsing fixes
|
||||
|
||||
# Test 1: Unicode expansion buffer overflow protection
|
||||
# Each \u0800 sequence (6 chars in JSON) becomes 3 UTF-8 bytes
|
||||
# Old code would allocate only 1 byte per sequence, causing buffer overflow
|
||||
def test_unicode_expansion()
|
||||
# Test single Unicode sequences of different byte lengths
|
||||
assert_load('"\\u0048"', 'H') # 1 UTF-8 byte (ASCII)
|
||||
assert_load('"\\u00E9"', 'é') # 2 UTF-8 bytes (Latin)
|
||||
assert_load('"\\u0800"', 'ࠀ') # 3 UTF-8 bytes (Samaritan)
|
||||
|
||||
# Test multiple Unicode sequences that would cause buffer overflow in old code
|
||||
var many_unicode = '"'
|
||||
for i: 0..49 # 50 sequences (0-49 inclusive), each \u0800 -> 3 bytes (150 bytes total vs 50 bytes old allocation)
|
||||
many_unicode += '\\u0800'
|
||||
end
|
||||
many_unicode += '"'
|
||||
|
||||
var result = json.load('{"test": ' + many_unicode + '}')
|
||||
assert(result != nil, "Unicode expansion test should succeed")
|
||||
assert(size(result['test']) == 150, "Unicode expansion should produce 150 UTF-8 bytes") # 50 * 3 bytes
|
||||
end
|
||||
|
||||
# Test 2: Invalid Unicode sequence rejection
|
||||
def test_invalid_unicode()
|
||||
# Invalid hex digits in Unicode sequences should be rejected
|
||||
assert_load_failed('"\\uXXXX"') # Non-hex characters
|
||||
assert_load_failed('"\\u12XY"') # Mixed valid/invalid hex
|
||||
assert_load_failed('"\\u"') # Incomplete sequence
|
||||
assert_load_failed('"\\u123"') # Too short
|
||||
assert_load_failed('"\\u123G"') # Invalid hex digit
|
||||
end
|
||||
|
||||
# Test 3: Control character validation
|
||||
def test_control_characters()
|
||||
# Unescaped control characters (0x00-0x1F) should be rejected
|
||||
# Note: We need to create JSON strings with actual unescaped control characters
|
||||
assert_load_failed('{"test": "hello\x0Aworld"}') # Unescaped newline (0x0A)
|
||||
assert_load_failed('{"test": "hello\x09world"}') # Unescaped tab (0x09)
|
||||
assert_load_failed('{"test": "hello\x0Dworld"}') # Unescaped carriage return (0x0D)
|
||||
assert_load_failed('{"test": "hello\x01world"}') # Unescaped control char (0x01)
|
||||
|
||||
# Properly escaped control characters should work
|
||||
var escaped_newline = json.load('{"test": "hello\\nworld"}')
|
||||
assert(escaped_newline != nil && escaped_newline['test'] == "hello\nworld", "Escaped newline should work")
|
||||
|
||||
var escaped_tab = json.load('{"test": "hello\\tworld"}')
|
||||
assert(escaped_tab != nil && escaped_tab['test'] == "hello\tworld", "Escaped tab should work")
|
||||
|
||||
var escaped_cr = json.load('{"test": "hello\\rworld"}')
|
||||
assert(escaped_cr != nil && escaped_cr['test'] == "hello\rworld", "Escaped carriage return should work")
|
||||
end
|
||||
|
||||
# Test 4: Invalid escape sequence rejection
|
||||
def test_invalid_escapes()
|
||||
# Invalid escape sequences should be rejected
|
||||
assert_load_failed('"\\q"') # Invalid escape character
|
||||
assert_load_failed('"\\x"') # Invalid escape character
|
||||
assert_load_failed('"\\z"') # Invalid escape character
|
||||
assert_load_failed('"\\"') # Incomplete escape at end
|
||||
end
|
||||
|
||||
# Test 5: String length limits
|
||||
def test_string_length_limits()
|
||||
# Test very long strings (should work up to limit)
|
||||
var long_str = '"'
|
||||
for i: 0..999 # 1000 character string (0-999 inclusive)
|
||||
long_str += 'a'
|
||||
end
|
||||
long_str += '"'
|
||||
|
||||
var result = json.load('{"test": ' + long_str + '}')
|
||||
assert(result != nil, "Long string within limits should work")
|
||||
assert(size(result['test']) == 1000, "Long string should have correct length")
|
||||
end
|
||||
|
||||
# Test 6: Mixed Unicode and ASCII (realistic scenario)
|
||||
def test_mixed_content()
|
||||
# Test realistic mixed content that could trigger the vulnerability
|
||||
var mixed = '{"message": "Hello \\u4E16\\u754C! Welcome to \\u0048\\u0065\\u006C\\u006C\\u006F world."}'
|
||||
var result = json.load(mixed)
|
||||
assert(result != nil, "Mixed Unicode/ASCII should work")
|
||||
assert(result['message'] == "Hello 世界! Welcome to Hello world.", "Mixed content should decode correctly")
|
||||
end
|
||||
|
||||
# Test 7: Edge cases
|
||||
def test_edge_cases()
|
||||
# Empty string
|
||||
var empty_result = json.load('{"empty": ""}')
|
||||
assert(empty_result != nil && empty_result['empty'] == "", "Empty string should work")
|
||||
|
||||
# String with only Unicode
|
||||
var unicode_result = json.load('{"unicode": "\\u0048\\u0065\\u006C\\u006C\\u006F"}')
|
||||
assert(unicode_result != nil && unicode_result['unicode'] == "Hello", "Unicode-only string should work")
|
||||
|
||||
# String with only escapes
|
||||
var escapes_result = json.load('{"escapes": "\\n\\t\\r\\\\\\\""}')
|
||||
assert(escapes_result != nil && escapes_result['escapes'] == "\n\t\r\\\"", "Escape-only string should work")
|
||||
|
||||
# Maximum valid Unicode value
|
||||
var max_unicode_result = json.load('{"max_unicode": "\\uFFFF"}')
|
||||
assert(max_unicode_result != nil, "Maximum Unicode value should work")
|
||||
end
|
||||
|
||||
# Test 8: Malformed JSON strings
|
||||
def test_malformed_strings()
|
||||
# Unterminated strings
|
||||
assert_load_failed('{"test": "unterminated')
|
||||
assert_load_failed('{"test": "unterminated\\')
|
||||
|
||||
# Invalid JSON structure with string issues
|
||||
assert_load_failed('{"test": "valid"x}')
|
||||
assert_load_failed('{"test": "\\uXXXX", "other": "valid"}')
|
||||
end
|
||||
|
||||
# Test 9: Nested objects with Unicode (stress test)
|
||||
def test_nested_unicode_stress()
|
||||
# Create nested structure with Unicode to test memory management
|
||||
var nested = '{"level0": {"unicode": "\\u0800\\u0801\\u0802", "level1": {"unicode": "\\u0800\\u0801\\u0802", "final": "\\u4E16\\u754C"}}}'
|
||||
|
||||
var result = json.load(nested)
|
||||
assert(result != nil, "Nested Unicode structure should parse successfully")
|
||||
end
|
||||
|
||||
# Test 10: Security regression test
|
||||
def test_security_regression()
|
||||
# This specific pattern would cause buffer overflow in the original code
|
||||
# \u0800 sequences: 6 chars in JSON -> 3 bytes in UTF-8 (50% expansion)
|
||||
var attack_pattern = '{"payload": "'
|
||||
for i: 0..99 # 100 sequences (0-99 inclusive) = 600 chars in JSON, 300 bytes needed, but old code allocated only 100 bytes
|
||||
attack_pattern += '\\u0800'
|
||||
end
|
||||
attack_pattern += '"}'
|
||||
|
||||
var result = json.load(attack_pattern)
|
||||
assert(result != nil, "Security regression test should not crash")
|
||||
assert(size(result['payload']) == 300, "Should produce exactly 300 UTF-8 bytes") # 100 * 3 bytes
|
||||
end
|
||||
|
||||
# Run all security tests
|
||||
test_unicode_expansion()
|
||||
test_invalid_unicode()
|
||||
test_control_characters()
|
||||
test_invalid_escapes()
|
||||
test_string_length_limits()
|
||||
test_mixed_content()
|
||||
test_edge_cases()
|
||||
test_malformed_strings()
|
||||
test_nested_unicode_stress()
|
||||
test_security_regression()
|
||||
|
Loading…
x
Reference in New Issue
Block a user