mirror of
https://github.com/arendst/Tasmota.git
synced 2025-07-27 04:36:31 +00:00
Berry vulnerability in JSON parsing for unicode (#23603)
This commit is contained in:
parent
e9b62811c7
commit
decdfc6b51
@ -16,6 +16,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- LVGL restore `lv_chart.set_range` removed in LVGL 9.3.0 in favor of `lv_chart.set_axis_range` (#23567)
|
- LVGL restore `lv_chart.set_range` removed in LVGL 9.3.0 in favor of `lv_chart.set_axis_range` (#23567)
|
||||||
|
- Berry vulnerability in JSON parsing for unicode
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
|
|
||||||
|
@ -259,6 +259,7 @@
|
|||||||
#undef BE_USE_SOLIDIFY_MODULE
|
#undef BE_USE_SOLIDIFY_MODULE
|
||||||
#define BE_USE_DEBUG_MODULE 1
|
#define BE_USE_DEBUG_MODULE 1
|
||||||
#define BE_USE_SOLIDIFY_MODULE 1
|
#define BE_USE_SOLIDIFY_MODULE 1
|
||||||
|
#define BE_MAPPING_ENABLE_INPUT_VALIDATION 1 // input validation for lv_mapping
|
||||||
#endif // USE_BERRY_DEBUG
|
#endif // USE_BERRY_DEBUG
|
||||||
|
|
||||||
/* Macro: BE_EXPLICIT_XXX
|
/* Macro: BE_EXPLICIT_XXX
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "be_lexer.h"
|
#include "be_lexer.h"
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
#if BE_USE_JSON_MODULE
|
#if BE_USE_JSON_MODULE
|
||||||
|
|
||||||
@ -20,6 +21,9 @@
|
|||||||
#define INDENT_WIDTH 2
|
#define INDENT_WIDTH 2
|
||||||
#define INDENT_CHAR ' '
|
#define INDENT_CHAR ' '
|
||||||
|
|
||||||
|
/* Security: Maximum JSON string length to prevent memory exhaustion attacks */
|
||||||
|
#define MAX_JSON_STRING_LEN (1024 * 1024) /* 1MB limit */
|
||||||
|
|
||||||
static const char* parser_value(bvm *vm, const char *json);
|
static const char* parser_value(bvm *vm, const char *json);
|
||||||
static void value_dump(bvm *vm, int *indent, int idx, int fmt);
|
static void value_dump(bvm *vm, int *indent, int idx, int fmt);
|
||||||
|
|
||||||
@ -62,21 +66,66 @@ static int is_object(bvm *vm, const char *class, int idx)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int json_strlen(const char *json)
|
/* Calculate the actual buffer size needed for JSON string parsing
|
||||||
|
* accounting for Unicode expansion and security limits */
|
||||||
|
static size_t json_strlen_safe(const char *json, size_t *actual_len)
|
||||||
{
|
{
|
||||||
int ch;
|
int ch;
|
||||||
const char *s = json + 1; /* skip '"' */
|
const char *s = json + 1; /* skip '"' */
|
||||||
/* get string length "(\\.|[^"])*" */
|
size_t char_count = 0;
|
||||||
|
size_t byte_count = 0;
|
||||||
|
|
||||||
while ((ch = *s) != '\0' && ch != '"') {
|
while ((ch = *s) != '\0' && ch != '"') {
|
||||||
|
char_count++;
|
||||||
|
if (char_count > MAX_JSON_STRING_LEN) {
|
||||||
|
return SIZE_MAX; /* String too long */
|
||||||
|
}
|
||||||
|
|
||||||
++s;
|
++s;
|
||||||
if (ch == '\\') {
|
if (ch == '\\') {
|
||||||
ch = *s++;
|
ch = *s++;
|
||||||
if (ch == '\0') {
|
if (ch == '\0') {
|
||||||
return -1;
|
return SIZE_MAX; /* Malformed string */
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ch) {
|
||||||
|
case '"': case '\\': case '/':
|
||||||
|
case 'b': case 'f': case 'n': case 'r': case 't':
|
||||||
|
byte_count += 1;
|
||||||
|
break;
|
||||||
|
case 'u':
|
||||||
|
/* Unicode can expand to 1-3 UTF-8 bytes
|
||||||
|
* We conservatively assume 3 bytes for safety */
|
||||||
|
byte_count += 3;
|
||||||
|
/* Verify we have 4 hex digits following */
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
if (!s[i] || !isxdigit((unsigned char)s[i])) {
|
||||||
|
return SIZE_MAX; /* Invalid unicode sequence */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
s += 4; /* Skip the 4 hex digits */
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return SIZE_MAX; /* Invalid escape sequence */
|
||||||
}
|
}
|
||||||
return ch ? cast_int(s - json - 1) : -1;
|
} else if (ch >= 0 && ch <= 0x1f) {
|
||||||
|
return SIZE_MAX; /* Unescaped control character */
|
||||||
|
} else {
|
||||||
|
byte_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for potential overflow */
|
||||||
|
if (byte_count > MAX_JSON_STRING_LEN) {
|
||||||
|
return SIZE_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch != '"') {
|
||||||
|
return SIZE_MAX; /* Unterminated string */
|
||||||
|
}
|
||||||
|
|
||||||
|
*actual_len = char_count;
|
||||||
|
return byte_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void json2berry(bvm *vm, const char *class)
|
static void json2berry(bvm *vm, const char *class)
|
||||||
@ -117,55 +166,94 @@ static const char* parser_null(bvm *vm, const char *json)
|
|||||||
|
|
||||||
static const char* parser_string(bvm *vm, const char *json)
|
static const char* parser_string(bvm *vm, const char *json)
|
||||||
{
|
{
|
||||||
if (*json == '"') {
|
if (*json != '"') {
|
||||||
int len = json_strlen(json++);
|
|
||||||
if (len > -1) {
|
|
||||||
int ch;
|
|
||||||
char *buf, *dst = buf = be_malloc(vm, len);
|
|
||||||
while ((ch = *json) != '\0' && ch != '"') {
|
|
||||||
++json;
|
|
||||||
if (ch == '\\') {
|
|
||||||
ch = *json++; /* skip '\' */
|
|
||||||
switch (ch) {
|
|
||||||
case '"': *dst++ = '"'; break;
|
|
||||||
case '\\': *dst++ = '\\'; break;
|
|
||||||
case '/': *dst++ = '/'; break;
|
|
||||||
case 'b': *dst++ = '\b'; break;
|
|
||||||
case 'f': *dst++ = '\f'; break;
|
|
||||||
case 'n': *dst++ = '\n'; break;
|
|
||||||
case 'r': *dst++ = '\r'; break;
|
|
||||||
case 't': *dst++ = '\t'; break;
|
|
||||||
case 'u': { /* load unicode */
|
|
||||||
dst = be_load_unicode(dst, json);
|
|
||||||
if (dst == NULL) {
|
|
||||||
be_free(vm, buf, len);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
json += 4;
|
|
||||||
|
size_t char_len;
|
||||||
|
size_t byte_len = json_strlen_safe(json, &char_len);
|
||||||
|
|
||||||
|
if (byte_len == SIZE_MAX) {
|
||||||
|
return NULL; /* Invalid or too long string */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (byte_len == 0) {
|
||||||
|
/* Empty string */
|
||||||
|
be_stack_require(vm, 1 + BE_STACK_FREE_MIN);
|
||||||
|
be_pushstring(vm, "");
|
||||||
|
return json + 2; /* Skip opening and closing quotes */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate buffer - size is correctly calculated by json_strlen_safe */
|
||||||
|
char *buf = be_malloc(vm, byte_len + 1);
|
||||||
|
if (!buf) {
|
||||||
|
return NULL; /* Out of memory */
|
||||||
|
}
|
||||||
|
|
||||||
|
char *dst = buf;
|
||||||
|
const char *src = json + 1; /* Skip opening quote */
|
||||||
|
int ch;
|
||||||
|
|
||||||
|
while ((ch = *src) != '\0' && ch != '"') {
|
||||||
|
++src;
|
||||||
|
if (ch == '\\') {
|
||||||
|
ch = *src++;
|
||||||
|
switch (ch) {
|
||||||
|
case '"':
|
||||||
|
*dst++ = '"';
|
||||||
|
break;
|
||||||
|
case '\\':
|
||||||
|
*dst++ = '\\';
|
||||||
|
break;
|
||||||
|
case '/':
|
||||||
|
*dst++ = '/';
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
*dst++ = '\b';
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
*dst++ = '\f';
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
*dst++ = '\n';
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
*dst++ = '\r';
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
*dst++ = '\t';
|
||||||
|
break;
|
||||||
|
case 'u': {
|
||||||
|
dst = be_load_unicode(dst, src);
|
||||||
|
if (dst == NULL) {
|
||||||
|
be_free(vm, buf, byte_len + 1);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
src += 4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: be_free(vm, buf, len); return NULL; /* error */
|
default:
|
||||||
|
be_free(vm, buf, byte_len + 1);
|
||||||
|
return NULL; /* Invalid escape */
|
||||||
}
|
}
|
||||||
} else if (ch >= 0 && ch <= 0x1f) {
|
} else if (ch >= 0 && ch <= 0x1f) {
|
||||||
/* control characters must be escaped
|
be_free(vm, buf, byte_len + 1);
|
||||||
as per https://www.rfc-editor.org/rfc/rfc7159#section-7 */
|
return NULL; /* Unescaped control character */
|
||||||
be_free(vm, buf, len);
|
|
||||||
return NULL;
|
|
||||||
} else {
|
} else {
|
||||||
*dst++ = (char)ch;
|
*dst++ = (char)ch;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
be_assert(ch == '"');
|
|
||||||
/* require the stack to have some free space for the string,
|
if (ch != '"') {
|
||||||
since parsing deeply nested objects might
|
be_free(vm, buf, byte_len + 1);
|
||||||
crash the VM due to insufficient stack space. */
|
return NULL; /* Unterminated string */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Success - create Berry string */
|
||||||
be_stack_require(vm, 1 + BE_STACK_FREE_MIN);
|
be_stack_require(vm, 1 + BE_STACK_FREE_MIN);
|
||||||
be_pushnstring(vm, buf, cast_int(dst - buf));
|
be_pushnstring(vm, buf, (size_t)(dst - buf));
|
||||||
be_free(vm, buf, len);
|
be_free(vm, buf, byte_len + 1);
|
||||||
return json + 1; /* skip '"' */
|
return src + 1; /* Skip closing quote */
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char* parser_field(bvm *vm, const char *json)
|
static const char* parser_field(bvm *vm, const char *json)
|
||||||
|
@ -93,3 +93,154 @@ for count : 10..200
|
|||||||
end
|
end
|
||||||
json.dump(arr)
|
json.dump(arr)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Security tests for JSON parsing fixes
|
||||||
|
|
||||||
|
# Test 1: Unicode expansion buffer overflow protection
|
||||||
|
# Each \u0800 sequence (6 chars in JSON) becomes 3 UTF-8 bytes
|
||||||
|
# Old code would allocate only 1 byte per sequence, causing buffer overflow
|
||||||
|
def test_unicode_expansion()
|
||||||
|
# Test single Unicode sequences of different byte lengths
|
||||||
|
assert_load('"\\u0048"', 'H') # 1 UTF-8 byte (ASCII)
|
||||||
|
assert_load('"\\u00E9"', 'é') # 2 UTF-8 bytes (Latin)
|
||||||
|
assert_load('"\\u0800"', 'ࠀ') # 3 UTF-8 bytes (Samaritan)
|
||||||
|
|
||||||
|
# Test multiple Unicode sequences that would cause buffer overflow in old code
|
||||||
|
var many_unicode = '"'
|
||||||
|
for i: 0..49 # 50 sequences (0-49 inclusive), each \u0800 -> 3 bytes (150 bytes total vs 50 bytes old allocation)
|
||||||
|
many_unicode += '\\u0800'
|
||||||
|
end
|
||||||
|
many_unicode += '"'
|
||||||
|
|
||||||
|
var result = json.load('{"test": ' + many_unicode + '}')
|
||||||
|
assert(result != nil, "Unicode expansion test should succeed")
|
||||||
|
assert(size(result['test']) == 150, "Unicode expansion should produce 150 UTF-8 bytes") # 50 * 3 bytes
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 2: Invalid Unicode sequence rejection
|
||||||
|
def test_invalid_unicode()
|
||||||
|
# Invalid hex digits in Unicode sequences should be rejected
|
||||||
|
assert_load_failed('"\\uXXXX"') # Non-hex characters
|
||||||
|
assert_load_failed('"\\u12XY"') # Mixed valid/invalid hex
|
||||||
|
assert_load_failed('"\\u"') # Incomplete sequence
|
||||||
|
assert_load_failed('"\\u123"') # Too short
|
||||||
|
assert_load_failed('"\\u123G"') # Invalid hex digit
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 3: Control character validation
|
||||||
|
def test_control_characters()
|
||||||
|
# Unescaped control characters (0x00-0x1F) should be rejected
|
||||||
|
# Note: We need to create JSON strings with actual unescaped control characters
|
||||||
|
assert_load_failed('{"test": "hello\x0Aworld"}') # Unescaped newline (0x0A)
|
||||||
|
assert_load_failed('{"test": "hello\x09world"}') # Unescaped tab (0x09)
|
||||||
|
assert_load_failed('{"test": "hello\x0Dworld"}') # Unescaped carriage return (0x0D)
|
||||||
|
assert_load_failed('{"test": "hello\x01world"}') # Unescaped control char (0x01)
|
||||||
|
|
||||||
|
# Properly escaped control characters should work
|
||||||
|
var escaped_newline = json.load('{"test": "hello\\nworld"}')
|
||||||
|
assert(escaped_newline != nil && escaped_newline['test'] == "hello\nworld", "Escaped newline should work")
|
||||||
|
|
||||||
|
var escaped_tab = json.load('{"test": "hello\\tworld"}')
|
||||||
|
assert(escaped_tab != nil && escaped_tab['test'] == "hello\tworld", "Escaped tab should work")
|
||||||
|
|
||||||
|
var escaped_cr = json.load('{"test": "hello\\rworld"}')
|
||||||
|
assert(escaped_cr != nil && escaped_cr['test'] == "hello\rworld", "Escaped carriage return should work")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 4: Invalid escape sequence rejection
|
||||||
|
def test_invalid_escapes()
|
||||||
|
# Invalid escape sequences should be rejected
|
||||||
|
assert_load_failed('"\\q"') # Invalid escape character
|
||||||
|
assert_load_failed('"\\x"') # Invalid escape character
|
||||||
|
assert_load_failed('"\\z"') # Invalid escape character
|
||||||
|
assert_load_failed('"\\"') # Incomplete escape at end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 5: String length limits
|
||||||
|
def test_string_length_limits()
|
||||||
|
# Test very long strings (should work up to limit)
|
||||||
|
var long_str = '"'
|
||||||
|
for i: 0..999 # 1000 character string (0-999 inclusive)
|
||||||
|
long_str += 'a'
|
||||||
|
end
|
||||||
|
long_str += '"'
|
||||||
|
|
||||||
|
var result = json.load('{"test": ' + long_str + '}')
|
||||||
|
assert(result != nil, "Long string within limits should work")
|
||||||
|
assert(size(result['test']) == 1000, "Long string should have correct length")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 6: Mixed Unicode and ASCII (realistic scenario)
|
||||||
|
def test_mixed_content()
|
||||||
|
# Test realistic mixed content that could trigger the vulnerability
|
||||||
|
var mixed = '{"message": "Hello \\u4E16\\u754C! Welcome to \\u0048\\u0065\\u006C\\u006C\\u006F world."}'
|
||||||
|
var result = json.load(mixed)
|
||||||
|
assert(result != nil, "Mixed Unicode/ASCII should work")
|
||||||
|
assert(result['message'] == "Hello 世界! Welcome to Hello world.", "Mixed content should decode correctly")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 7: Edge cases
|
||||||
|
def test_edge_cases()
|
||||||
|
# Empty string
|
||||||
|
var empty_result = json.load('{"empty": ""}')
|
||||||
|
assert(empty_result != nil && empty_result['empty'] == "", "Empty string should work")
|
||||||
|
|
||||||
|
# String with only Unicode
|
||||||
|
var unicode_result = json.load('{"unicode": "\\u0048\\u0065\\u006C\\u006C\\u006F"}')
|
||||||
|
assert(unicode_result != nil && unicode_result['unicode'] == "Hello", "Unicode-only string should work")
|
||||||
|
|
||||||
|
# String with only escapes
|
||||||
|
var escapes_result = json.load('{"escapes": "\\n\\t\\r\\\\\\\""}')
|
||||||
|
assert(escapes_result != nil && escapes_result['escapes'] == "\n\t\r\\\"", "Escape-only string should work")
|
||||||
|
|
||||||
|
# Maximum valid Unicode value
|
||||||
|
var max_unicode_result = json.load('{"max_unicode": "\\uFFFF"}')
|
||||||
|
assert(max_unicode_result != nil, "Maximum Unicode value should work")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 8: Malformed JSON strings
|
||||||
|
def test_malformed_strings()
|
||||||
|
# Unterminated strings
|
||||||
|
assert_load_failed('{"test": "unterminated')
|
||||||
|
assert_load_failed('{"test": "unterminated\\')
|
||||||
|
|
||||||
|
# Invalid JSON structure with string issues
|
||||||
|
assert_load_failed('{"test": "valid"x}')
|
||||||
|
assert_load_failed('{"test": "\\uXXXX", "other": "valid"}')
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 9: Nested objects with Unicode (stress test)
|
||||||
|
def test_nested_unicode_stress()
|
||||||
|
# Create nested structure with Unicode to test memory management
|
||||||
|
var nested = '{"level0": {"unicode": "\\u0800\\u0801\\u0802", "level1": {"unicode": "\\u0800\\u0801\\u0802", "final": "\\u4E16\\u754C"}}}'
|
||||||
|
|
||||||
|
var result = json.load(nested)
|
||||||
|
assert(result != nil, "Nested Unicode structure should parse successfully")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Test 10: Security regression test
|
||||||
|
def test_security_regression()
|
||||||
|
# This specific pattern would cause buffer overflow in the original code
|
||||||
|
# \u0800 sequences: 6 chars in JSON -> 3 bytes in UTF-8 (50% expansion)
|
||||||
|
var attack_pattern = '{"payload": "'
|
||||||
|
for i: 0..99 # 100 sequences (0-99 inclusive) = 600 chars in JSON, 300 bytes needed, but old code allocated only 100 bytes
|
||||||
|
attack_pattern += '\\u0800'
|
||||||
|
end
|
||||||
|
attack_pattern += '"}'
|
||||||
|
|
||||||
|
var result = json.load(attack_pattern)
|
||||||
|
assert(result != nil, "Security regression test should not crash")
|
||||||
|
assert(size(result['payload']) == 300, "Should produce exactly 300 UTF-8 bytes") # 100 * 3 bytes
|
||||||
|
end
|
||||||
|
|
||||||
|
# Run all security tests
|
||||||
|
test_unicode_expansion()
|
||||||
|
test_invalid_unicode()
|
||||||
|
test_control_characters()
|
||||||
|
test_invalid_escapes()
|
||||||
|
test_string_length_limits()
|
||||||
|
test_mixed_content()
|
||||||
|
test_edge_cases()
|
||||||
|
test_malformed_strings()
|
||||||
|
test_nested_unicode_stress()
|
||||||
|
test_security_regression()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user