From decdfc6b510ff593cf225fcb2ca7e5be0f7be803 Mon Sep 17 00:00:00 2001 From: s-hadinger <49731213+s-hadinger@users.noreply.github.com> Date: Fri, 27 Jun 2025 19:38:31 +0200 Subject: [PATCH] Berry vulnerability in JSON parsing for unicode (#23603) --- CHANGELOG.md | 1 + lib/libesp32/berry/default/berry_conf.h | 1 + lib/libesp32/berry/src/be_jsonlib.c | 184 +++++++++++++++++------- lib/libesp32/berry/tests/json.be | 151 +++++++++++++++++++ 4 files changed, 289 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 725d449c1..7b257db0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ All notable changes to this project will be documented in this file. ### Fixed - LVGL restore `lv_chart.set_range` removed in LVGL 9.3.0 in favor of `lv_chart.set_axis_range` (#23567) +- Berry vulnerability in JSON parsing for unicode ### Removed diff --git a/lib/libesp32/berry/default/berry_conf.h b/lib/libesp32/berry/default/berry_conf.h index 13200f530..91fe5f3de 100644 --- a/lib/libesp32/berry/default/berry_conf.h +++ b/lib/libesp32/berry/default/berry_conf.h @@ -259,6 +259,7 @@ #undef BE_USE_SOLIDIFY_MODULE #define BE_USE_DEBUG_MODULE 1 #define BE_USE_SOLIDIFY_MODULE 1 + #define BE_MAPPING_ENABLE_INPUT_VALIDATION 1 // input validation for lv_mapping #endif // USE_BERRY_DEBUG /* Macro: BE_EXPLICIT_XXX diff --git a/lib/libesp32/berry/src/be_jsonlib.c b/lib/libesp32/berry/src/be_jsonlib.c index 33aca5dc4..8150fb74f 100644 --- a/lib/libesp32/berry/src/be_jsonlib.c +++ b/lib/libesp32/berry/src/be_jsonlib.c @@ -10,6 +10,7 @@ #include "be_lexer.h" #include #include +#include #if BE_USE_JSON_MODULE @@ -20,6 +21,9 @@ #define INDENT_WIDTH 2 #define INDENT_CHAR ' ' +/* Security: Maximum JSON string length to prevent memory exhaustion attacks */ +#define MAX_JSON_STRING_LEN (1024 * 1024) /* 1MB limit */ + static const char* parser_value(bvm *vm, const char *json); static void value_dump(bvm *vm, int *indent, int idx, int fmt); @@ -62,21 +66,66 @@ static int is_object(bvm *vm, const char *class, int idx) return 0; } -static int json_strlen(const char *json) +/* Calculate the actual buffer size needed for JSON string parsing + * accounting for Unicode expansion and security limits */ +static size_t json_strlen_safe(const char *json, size_t *actual_len) { int ch; const char *s = json + 1; /* skip '"' */ - /* get string length "(\\.|[^"])*" */ + size_t char_count = 0; + size_t byte_count = 0; + while ((ch = *s) != '\0' && ch != '"') { + char_count++; + if (char_count > MAX_JSON_STRING_LEN) { + return SIZE_MAX; /* String too long */ + } + ++s; if (ch == '\\') { ch = *s++; if (ch == '\0') { - return -1; + return SIZE_MAX; /* Malformed string */ } + + switch (ch) { + case '"': case '\\': case '/': + case 'b': case 'f': case 'n': case 'r': case 't': + byte_count += 1; + break; + case 'u': + /* Unicode can expand to 1-3 UTF-8 bytes + * We conservatively assume 3 bytes for safety */ + byte_count += 3; + /* Verify we have 4 hex digits following */ + for (int i = 0; i < 4; i++) { + if (!s[i] || !isxdigit((unsigned char)s[i])) { + return SIZE_MAX; /* Invalid unicode sequence */ + } + } + s += 4; /* Skip the 4 hex digits */ + break; + default: + return SIZE_MAX; /* Invalid escape sequence */ + } + } else if (ch >= 0 && ch <= 0x1f) { + return SIZE_MAX; /* Unescaped control character */ + } else { + byte_count += 1; + } + + /* Check for potential overflow */ + if (byte_count > MAX_JSON_STRING_LEN) { + return SIZE_MAX; } } - return ch ? cast_int(s - json - 1) : -1; + + if (ch != '"') { + return SIZE_MAX; /* Unterminated string */ + } + + *actual_len = char_count; + return byte_count; } static void json2berry(bvm *vm, const char *class) @@ -117,55 +166,94 @@ static const char* parser_null(bvm *vm, const char *json) static const char* parser_string(bvm *vm, const char *json) { - if (*json == '"') { - int len = json_strlen(json++); - if (len > -1) { - int ch; - char *buf, *dst = buf = be_malloc(vm, len); - while ((ch = *json) != '\0' && ch != '"') { - ++json; - if (ch == '\\') { - ch = *json++; /* skip '\' */ - switch (ch) { - case '"': *dst++ = '"'; break; - case '\\': *dst++ = '\\'; break; - case '/': *dst++ = '/'; break; - case 'b': *dst++ = '\b'; break; - case 'f': *dst++ = '\f'; break; - case 'n': *dst++ = '\n'; break; - case 'r': *dst++ = '\r'; break; - case 't': *dst++ = '\t'; break; - case 'u': { /* load unicode */ - dst = be_load_unicode(dst, json); - if (dst == NULL) { - be_free(vm, buf, len); - return NULL; - } - json += 4; - break; - } - default: be_free(vm, buf, len); return NULL; /* error */ - } - } else if(ch >= 0 && ch <= 0x1f) { - /* control characters must be escaped - as per https://www.rfc-editor.org/rfc/rfc7159#section-7 */ - be_free(vm, buf, len); + if (*json != '"') { + return NULL; + } + + size_t char_len; + size_t byte_len = json_strlen_safe(json, &char_len); + + if (byte_len == SIZE_MAX) { + return NULL; /* Invalid or too long string */ + } + + if (byte_len == 0) { + /* Empty string */ + be_stack_require(vm, 1 + BE_STACK_FREE_MIN); + be_pushstring(vm, ""); + return json + 2; /* Skip opening and closing quotes */ + } + + /* Allocate buffer - size is correctly calculated by json_strlen_safe */ + char *buf = be_malloc(vm, byte_len + 1); + if (!buf) { + return NULL; /* Out of memory */ + } + + char *dst = buf; + const char *src = json + 1; /* Skip opening quote */ + int ch; + + while ((ch = *src) != '\0' && ch != '"') { + ++src; + if (ch == '\\') { + ch = *src++; + switch (ch) { + case '"': + *dst++ = '"'; + break; + case '\\': + *dst++ = '\\'; + break; + case '/': + *dst++ = '/'; + break; + case 'b': + *dst++ = '\b'; + break; + case 'f': + *dst++ = '\f'; + break; + case 'n': + *dst++ = '\n'; + break; + case 'r': + *dst++ = '\r'; + break; + case 't': + *dst++ = '\t'; + break; + case 'u': { + dst = be_load_unicode(dst, src); + if (dst == NULL) { + be_free(vm, buf, byte_len + 1); return NULL; - } else { - *dst++ = (char)ch; } + src += 4; + break; } - be_assert(ch == '"'); - /* require the stack to have some free space for the string, - since parsing deeply nested objects might - crash the VM due to insufficient stack space. */ - be_stack_require(vm, 1 + BE_STACK_FREE_MIN); - be_pushnstring(vm, buf, cast_int(dst - buf)); - be_free(vm, buf, len); - return json + 1; /* skip '"' */ + default: + be_free(vm, buf, byte_len + 1); + return NULL; /* Invalid escape */ + } + } else if (ch >= 0 && ch <= 0x1f) { + be_free(vm, buf, byte_len + 1); + return NULL; /* Unescaped control character */ + } else { + *dst++ = (char)ch; } } - return NULL; + + if (ch != '"') { + be_free(vm, buf, byte_len + 1); + return NULL; /* Unterminated string */ + } + + /* Success - create Berry string */ + be_stack_require(vm, 1 + BE_STACK_FREE_MIN); + be_pushnstring(vm, buf, (size_t)(dst - buf)); + be_free(vm, buf, byte_len + 1); + return src + 1; /* Skip closing quote */ } static const char* parser_field(bvm *vm, const char *json) diff --git a/lib/libesp32/berry/tests/json.be b/lib/libesp32/berry/tests/json.be index 2165eda8d..278c1ce20 100644 --- a/lib/libesp32/berry/tests/json.be +++ b/lib/libesp32/berry/tests/json.be @@ -93,3 +93,154 @@ for count : 10..200 end json.dump(arr) end + +# Security tests for JSON parsing fixes + +# Test 1: Unicode expansion buffer overflow protection +# Each \u0800 sequence (6 chars in JSON) becomes 3 UTF-8 bytes +# Old code would allocate only 1 byte per sequence, causing buffer overflow +def test_unicode_expansion() + # Test single Unicode sequences of different byte lengths + assert_load('"\\u0048"', 'H') # 1 UTF-8 byte (ASCII) + assert_load('"\\u00E9"', 'é') # 2 UTF-8 bytes (Latin) + assert_load('"\\u0800"', 'ࠀ') # 3 UTF-8 bytes (Samaritan) + + # Test multiple Unicode sequences that would cause buffer overflow in old code + var many_unicode = '"' + for i: 0..49 # 50 sequences (0-49 inclusive), each \u0800 -> 3 bytes (150 bytes total vs 50 bytes old allocation) + many_unicode += '\\u0800' + end + many_unicode += '"' + + var result = json.load('{"test": ' + many_unicode + '}') + assert(result != nil, "Unicode expansion test should succeed") + assert(size(result['test']) == 150, "Unicode expansion should produce 150 UTF-8 bytes") # 50 * 3 bytes +end + +# Test 2: Invalid Unicode sequence rejection +def test_invalid_unicode() + # Invalid hex digits in Unicode sequences should be rejected + assert_load_failed('"\\uXXXX"') # Non-hex characters + assert_load_failed('"\\u12XY"') # Mixed valid/invalid hex + assert_load_failed('"\\u"') # Incomplete sequence + assert_load_failed('"\\u123"') # Too short + assert_load_failed('"\\u123G"') # Invalid hex digit +end + +# Test 3: Control character validation +def test_control_characters() + # Unescaped control characters (0x00-0x1F) should be rejected + # Note: We need to create JSON strings with actual unescaped control characters + assert_load_failed('{"test": "hello\x0Aworld"}') # Unescaped newline (0x0A) + assert_load_failed('{"test": "hello\x09world"}') # Unescaped tab (0x09) + assert_load_failed('{"test": "hello\x0Dworld"}') # Unescaped carriage return (0x0D) + assert_load_failed('{"test": "hello\x01world"}') # Unescaped control char (0x01) + + # Properly escaped control characters should work + var escaped_newline = json.load('{"test": "hello\\nworld"}') + assert(escaped_newline != nil && escaped_newline['test'] == "hello\nworld", "Escaped newline should work") + + var escaped_tab = json.load('{"test": "hello\\tworld"}') + assert(escaped_tab != nil && escaped_tab['test'] == "hello\tworld", "Escaped tab should work") + + var escaped_cr = json.load('{"test": "hello\\rworld"}') + assert(escaped_cr != nil && escaped_cr['test'] == "hello\rworld", "Escaped carriage return should work") +end + +# Test 4: Invalid escape sequence rejection +def test_invalid_escapes() + # Invalid escape sequences should be rejected + assert_load_failed('"\\q"') # Invalid escape character + assert_load_failed('"\\x"') # Invalid escape character + assert_load_failed('"\\z"') # Invalid escape character + assert_load_failed('"\\"') # Incomplete escape at end +end + +# Test 5: String length limits +def test_string_length_limits() + # Test very long strings (should work up to limit) + var long_str = '"' + for i: 0..999 # 1000 character string (0-999 inclusive) + long_str += 'a' + end + long_str += '"' + + var result = json.load('{"test": ' + long_str + '}') + assert(result != nil, "Long string within limits should work") + assert(size(result['test']) == 1000, "Long string should have correct length") +end + +# Test 6: Mixed Unicode and ASCII (realistic scenario) +def test_mixed_content() + # Test realistic mixed content that could trigger the vulnerability + var mixed = '{"message": "Hello \\u4E16\\u754C! Welcome to \\u0048\\u0065\\u006C\\u006C\\u006F world."}' + var result = json.load(mixed) + assert(result != nil, "Mixed Unicode/ASCII should work") + assert(result['message'] == "Hello 世界! Welcome to Hello world.", "Mixed content should decode correctly") +end + +# Test 7: Edge cases +def test_edge_cases() + # Empty string + var empty_result = json.load('{"empty": ""}') + assert(empty_result != nil && empty_result['empty'] == "", "Empty string should work") + + # String with only Unicode + var unicode_result = json.load('{"unicode": "\\u0048\\u0065\\u006C\\u006C\\u006F"}') + assert(unicode_result != nil && unicode_result['unicode'] == "Hello", "Unicode-only string should work") + + # String with only escapes + var escapes_result = json.load('{"escapes": "\\n\\t\\r\\\\\\\""}') + assert(escapes_result != nil && escapes_result['escapes'] == "\n\t\r\\\"", "Escape-only string should work") + + # Maximum valid Unicode value + var max_unicode_result = json.load('{"max_unicode": "\\uFFFF"}') + assert(max_unicode_result != nil, "Maximum Unicode value should work") +end + +# Test 8: Malformed JSON strings +def test_malformed_strings() + # Unterminated strings + assert_load_failed('{"test": "unterminated') + assert_load_failed('{"test": "unterminated\\') + + # Invalid JSON structure with string issues + assert_load_failed('{"test": "valid"x}') + assert_load_failed('{"test": "\\uXXXX", "other": "valid"}') +end + +# Test 9: Nested objects with Unicode (stress test) +def test_nested_unicode_stress() + # Create nested structure with Unicode to test memory management + var nested = '{"level0": {"unicode": "\\u0800\\u0801\\u0802", "level1": {"unicode": "\\u0800\\u0801\\u0802", "final": "\\u4E16\\u754C"}}}' + + var result = json.load(nested) + assert(result != nil, "Nested Unicode structure should parse successfully") +end + +# Test 10: Security regression test +def test_security_regression() + # This specific pattern would cause buffer overflow in the original code + # \u0800 sequences: 6 chars in JSON -> 3 bytes in UTF-8 (50% expansion) + var attack_pattern = '{"payload": "' + for i: 0..99 # 100 sequences (0-99 inclusive) = 600 chars in JSON, 300 bytes needed, but old code allocated only 100 bytes + attack_pattern += '\\u0800' + end + attack_pattern += '"}' + + var result = json.load(attack_pattern) + assert(result != nil, "Security regression test should not crash") + assert(size(result['payload']) == 300, "Should produce exactly 300 UTF-8 bytes") # 100 * 3 bytes +end + +# Run all security tests +test_unicode_expansion() +test_invalid_unicode() +test_control_characters() +test_invalid_escapes() +test_string_length_limits() +test_mixed_content() +test_edge_cases() +test_malformed_strings() +test_nested_unicode_stress() +test_security_regression()