From 2b7bc1cd9f2ad65e7d59705e64c838c5b18cd59e Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@koston.org>
Date: Sun, 29 Jun 2025 11:03:37 -0500
Subject: [PATCH] fixes

---
 .../web_server_idf/multipart_parser.h         |   2 +-
 .../web_server_idf/multipart_parser_utils.h   | 127 +++++--
 .../web_server_idf/test_multipart_parser.cpp  | 319 ++++++++++++++++++
 .../web_server_idf/web_server_idf.cpp         |  25 +-
 4 files changed, 438 insertions(+), 35 deletions(-)
 create mode 100644 esphome/components/web_server_idf/test_multipart_parser.cpp
diff --git a/esphome/components/web_server_idf/multipart_parser.h b/esphome/components/web_server_idf/multipart_parser.h
index 5d2d940e79..466bfd6dd4 100644
--- a/esphome/components/web_server_idf/multipart_parser.h
+++ b/esphome/components/web_server_idf/multipart_parser.h
@@ -12,7 +12,7 @@ namespace web_server_idf {
 // Multipart form data parser for ESP-IDF
 class MultipartParser {
  public:
-  enum State { BOUNDARY_SEARCH, HEADERS, CONTENT, DONE, ERROR };
+  enum State : uint8_t { BOUNDARY_SEARCH, HEADERS, CONTENT, DONE, ERROR };
 
   struct Part {
     std::string name;
diff --git a/esphome/components/web_server_idf/multipart_parser_utils.h b/esphome/components/web_server_idf/multipart_parser_utils.h
index 43b7ced03d..a644a392ad 100644
--- a/esphome/components/web_server_idf/multipart_parser_utils.h
+++ b/esphome/components/web_server_idf/multipart_parser_utils.h
@@ -4,21 +4,30 @@
 
 #include <string>
 #include <cctype>
+#include <cstring>
 
 namespace esphome {
 namespace web_server_idf {
 
+// Helper function for case-insensitive character comparison
+inline bool char_equals_ci(char a, char b) { return ::tolower(a) == ::tolower(b); }
+
+// Helper function for case-insensitive string region comparison
+inline bool str_ncmp_ci(const char *s1, const char *s2, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    if (!char_equals_ci(s1[i], s2[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
 // Case-insensitive string comparison
 inline bool str_equals_case_insensitive(const std::string &a, const std::string &b) {
   if (a.length() != b.length()) {
     return false;
   }
-  for (size_t i = 0; i < a.length(); i++) {
-    if (tolower(a[i]) != tolower(b[i])) {
-      return false;
-    }
-  }
-  return true;
+  return str_ncmp_ci(a.c_str(), b.c_str(), a.length());
 }
 
 // Case-insensitive string prefix check
@@ -26,12 +35,7 @@ inline bool str_startswith_case_insensitive(const std::string &str, const std::s
   if (str.length() < prefix.length()) {
     return false;
   }
-  for (size_t i = 0; i < prefix.length(); i++) {
-    if (tolower(str[i]) != tolower(prefix[i])) {
-      return false;
-    }
-  }
-  return true;
+  return str_ncmp_ci(str.c_str(), prefix.c_str(), prefix.length());
 }
 
 // Find a substring case-insensitively
@@ -40,15 +44,11 @@ inline size_t str_find_case_insensitive(const std::string &haystack, const std::
     return std::string::npos;
   }
 
-  for (size_t i = pos; i <= haystack.length() - needle.length(); i++) {
-    bool match = true;
-    for (size_t j = 0; j < needle.length(); j++) {
-      if (tolower(haystack[i + j]) != tolower(needle[j])) {
-        match = false;
-        break;
-      }
-    }
-    if (match) {
+  const size_t needle_len = needle.length();
+  const size_t max_pos = haystack.length() - needle_len;
+
+  for (size_t i = pos; i <= max_pos; i++) {
+    if (str_ncmp_ci(haystack.c_str() + i, needle.c_str(), needle_len)) {
       return i;
     }
   }
@@ -122,6 +122,91 @@ inline std::string extract_header_param(const std::string &header, const std::st
   return "";
 }
 
+// Case-insensitive string search (like strstr but case-insensitive)
+inline const char *stristr(const char *haystack, const char *needle) {
+  if (!haystack || !needle) {
+    return nullptr;
+  }
+
+  size_t needle_len = strlen(needle);
+  if (needle_len == 0) {
+    return haystack;
+  }
+
+  for (const char *p = haystack; *p; p++) {
+    if (str_ncmp_ci(p, needle, needle_len)) {
+      return p;
+    }
+  }
+
+  return nullptr;
+}
+
+// Parse boundary from Content-Type header
+// Returns true if boundary found, false otherwise
+// boundary_start and boundary_len will point to the boundary value
+inline bool parse_multipart_boundary(const char *content_type, const char **boundary_start, size_t *boundary_len) {
+  if (!content_type) {
+    return false;
+  }
+
+  // Check for multipart/form-data (case-insensitive)
+  if (!stristr(content_type, "multipart/form-data")) {
+    return false;
+  }
+
+  // Look for boundary parameter
+  const char *b = stristr(content_type, "boundary=");
+  if (!b) {
+    return false;
+  }
+
+  const char *start = b + 9;  // Skip "boundary="
+
+  // Skip whitespace
+  while (*start == ' ' || *start == '\t') {
+    start++;
+  }
+
+  if (!*start) {
+    return false;
+  }
+
+  // Find end of boundary
+  const char *end = start;
+  if (*end == '"') {
+    // Quoted boundary
+    start++;
+    end++;
+    while (*end && *end != '"') {
+      end++;
+    }
+    *boundary_len = end - start;
+  } else {
+    // Unquoted boundary
+    while (*end && *end != ' ' && *end != ';' && *end != '\r' && *end != '\n' && *end != '\t') {
+      end++;
+    }
+    *boundary_len = end - start;
+  }
+
+  if (*boundary_len == 0) {
+    return false;
+  }
+
+  *boundary_start = start;
+  return true;
+}
+
+// Check if content type is form-urlencoded (case-insensitive)
+inline bool is_form_urlencoded(const char *content_type) {
+  if (!content_type) {
+    return false;
+  }
+
+  return stristr(content_type, "application/x-www-form-urlencoded") != nullptr;
+}
+
 }  // namespace web_server_idf
 }  // namespace esphome
 #endif  // USE_WEBSERVER_OTA
diff --git a/esphome/components/web_server_idf/test_multipart_parser.cpp b/esphome/components/web_server_idf/test_multipart_parser.cpp
new file mode 100644
index 0000000000..3579cdb982
--- /dev/null
+++ b/esphome/components/web_server_idf/test_multipart_parser.cpp
@@ -0,0 +1,319 @@
+#ifdef USE_ESP_IDF
+#ifdef USE_WEBSERVER_OTA
+
+#include <cassert>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "multipart_parser.h"
+
+namespace esphome {
+namespace web_server_idf {
+namespace test {
+
+void print_test_result(const std::string &test_name, bool passed) {
+  std::cout << test_name << ": " << (passed ? "PASSED" : "FAILED") << std::endl;
+}
+
+bool test_simple_multipart() {
+  std::string boundary = "----WebKitFormBoundary1234567890";
+  std::string data = "------WebKitFormBoundary1234567890\r\n"
+                     "Content-Disposition: form-data; name=\"file\"; filename=\"test.bin\"\r\n"
+                     "Content-Type: application/octet-stream\r\n"
+                     "\r\n"
+                     "Hello World!\r\n"
+                     "------WebKitFormBoundary1234567890--\r\n";
+
+  MultipartParser parser(boundary);
+  bool result = parser.parse(reinterpret_cast<const uint8_t *>(data.c_str()), data.length());
+
+  if (!result) {
+    return false;
+  }
+
+  MultipartParser::Part part;
+  if (!parser.get_current_part(part)) {
+    return false;
+  }
+
+  return part.filename == "test.bin" && part.name == "file" && part.length == 12 &&
+         memcmp(part.data, "Hello World!", 12) == 0;
+}
+
+bool test_chunked_parsing() {
+  std::string boundary = "----WebKitFormBoundary1234567890";
+  std::string data = "------WebKitFormBoundary1234567890\r\n"
+                     "Content-Disposition: form-data; name=\"firmware\"; filename=\"app.bin\"\r\n"
+                     "Content-Type: application/octet-stream\r\n"
+                     "\r\n"
+                     "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n"
+                     "------WebKitFormBoundary1234567890--\r\n";
+
+  MultipartParser parser(boundary);
+
+  // Parse in small chunks
+  size_t chunk_size = 10;
+  bool found_part = false;
+
+  for (size_t i = 0; i < data.length(); i += chunk_size) {
+    size_t len = std::min(chunk_size, data.length() - i);
+    bool has_part = parser.parse(reinterpret_cast<const uint8_t *>(data.c_str() + i), len);
+
+    if (has_part && !found_part) {
+      found_part = true;
+      MultipartParser::Part part;
+      if (!parser.get_current_part(part)) {
+        return false;
+      }
+
+      return part.filename == "app.bin" && part.name == "firmware" && part.length == 26 &&
+             memcmp(part.data, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26) == 0;
+    }
+  }
+
+  return found_part;
+}
+
+bool test_multiple_parts() {
+  std::string boundary = "----WebKitFormBoundary1234567890";
+  std::string data = "------WebKitFormBoundary1234567890\r\n"
+                     "Content-Disposition: form-data; name=\"field1\"\r\n"
+                     "\r\n"
+                     "value1\r\n"
+                     "------WebKitFormBoundary1234567890\r\n"
+                     "Content-Disposition: form-data; name=\"file\"; filename=\"test.bin\"\r\n"
+                     "Content-Type: application/octet-stream\r\n"
+                     "\r\n"
+                     "Binary content here\r\n"
+                     "------WebKitFormBoundary1234567890--\r\n";
+
+  MultipartParser parser(boundary);
+  std::vector<MultipartParser::Part> parts;
+
+  // Parse all at once
+  size_t offset = 0;
+  while (offset < data.length()) {
+    size_t chunk_size = data.length() - offset;
+    bool has_part = parser.parse(reinterpret_cast<const uint8_t *>(data.c_str() + offset), chunk_size);
+
+    if (has_part) {
+      MultipartParser::Part part;
+      if (parser.get_current_part(part)) {
+        parts.push_back(part);
+        parser.consume_part();
+      }
+    }
+
+    offset += chunk_size;
+
+    if (parser.is_done()) {
+      break;
+    }
+  }
+
+  if (parts.size() != 2) {
+    return false;
+  }
+
+  // Check first part (form field)
+  if (parts[0].name != "field1" || !parts[0].filename.empty() || parts[0].length != 6 ||
+      memcmp(parts[0].data, "value1", 6) != 0) {
+    return false;
+  }
+
+  // Check second part (file)
+  if (parts[1].name != "file" || parts[1].filename != "test.bin" || parts[1].length != 19 ||
+      memcmp(parts[1].data, "Binary content here", 19) != 0) {
+    return false;
+  }
+
+  return true;
+}
+
+bool test_boundary_edge_cases() {
+  // Test when boundary is split across chunks
+  std::string boundary = "----WebKitFormBoundary1234567890";
+  std::string data = "------WebKitFormBoundary1234567890\r\n"
+                     "Content-Disposition: form-data; name=\"file\"; filename=\"test.bin\"\r\n"
+                     "\r\n"
+                     "Content before boundary\r\n"
+                     "------WebKitFormBoundary1234567890--\r\n";
+
+  MultipartParser parser(boundary);
+
+  // Parse with boundary split across chunks
+  std::vector<std::string> chunks = {
+      std::string(data.c_str(), 50),                        // Part of headers
+      std::string(data.c_str() + 50, 60),                   // Rest of headers + start of content
+      std::string(data.c_str() + 110, 20),                  // Middle of content
+      std::string(data.c_str() + 130, data.length() - 130)  // End with boundary
+  };
+
+  bool found_part = false;
+  for (const auto &chunk : chunks) {
+    bool has_part = parser.parse(reinterpret_cast<const uint8_t *>(chunk.c_str()), chunk.length());
+
+    if (has_part && !found_part) {
+      found_part = true;
+      MultipartParser::Part part;
+      if (!parser.get_current_part(part)) {
+        return false;
+      }
+
+      return part.filename == "test.bin" && part.length == 23 && memcmp(part.data, "Content before boundary", 23) == 0;
+    }
+  }
+
+  return found_part;
+}
+
+bool test_empty_filename() {
+  std::string boundary = "xyz123";
+  std::string data = "--xyz123\r\n"
+                     "Content-Disposition: form-data; name=\"field\"\r\n"
+                     "\r\n"
+                     "Just a regular field\r\n"
+                     "--xyz123--\r\n";
+
+  MultipartParser parser(boundary);
+  bool result = parser.parse(reinterpret_cast<const uint8_t *>(data.c_str()), data.length());
+
+  if (!result) {
+    return false;
+  }
+
+  MultipartParser::Part part;
+  if (!parser.get_current_part(part)) {
+    return false;
+  }
+
+  return part.name == "field" && part.filename.empty() && part.length == 20 &&
+         memcmp(part.data, "Just a regular field", 20) == 0;
+}
+
+bool test_content_type_header() {
+  std::string boundary = "boundary123";
+  std::string data = "--boundary123\r\n"
+                     "Content-Disposition: form-data; name=\"upload\"; filename=\"data.json\"\r\n"
+                     "Content-Type: application/json\r\n"
+                     "\r\n"
+                     "{\"key\": \"value\"}\r\n"
+                     "--boundary123--\r\n";
+
+  MultipartParser parser(boundary);
+  bool result = parser.parse(reinterpret_cast<const uint8_t *>(data.c_str()), data.length());
+
+  if (!result) {
+    return false;
+  }
+
+  MultipartParser::Part part;
+  if (!parser.get_current_part(part)) {
+    return false;
+  }
+
+  return part.name == "upload" && part.filename == "data.json" && part.content_type == "application/json" &&
+         part.length == 16 && memcmp(part.data, "{\"key\": \"value\"}", 16) == 0;
+}
+
+bool test_large_content() {
+  std::string boundary = "----WebKitFormBoundary1234567890";
+
+  // Generate large content
+  std::string large_content;
+  for (int i = 0; i < 1000; i++) {
+    large_content += "0123456789";
+  }
+
+  std::string data = "------WebKitFormBoundary1234567890\r\n"
+                     "Content-Disposition: form-data; name=\"firmware\"; filename=\"large.bin\"\r\n"
+                     "\r\n" +
+                     large_content +
+                     "\r\n"
+                     "------WebKitFormBoundary1234567890--\r\n";
+
+  MultipartParser parser(boundary);
+
+  // Parse in realistic chunks
+  size_t chunk_size = 256;
+  bool found_complete = false;
+  size_t total_content_parsed = 0;
+
+  for (size_t i = 0; i < data.length(); i += chunk_size) {
+    size_t len = std::min(chunk_size, data.length() - i);
+    bool has_part = parser.parse(reinterpret_cast<const uint8_t *>(data.c_str() + i), len);
+
+    if (has_part) {
+      MultipartParser::Part part;
+      if (parser.get_current_part(part)) {
+        // For large content, we might get it in pieces
+        if (part.length == large_content.length()) {
+          found_complete = true;
+          return part.filename == "large.bin" && part.length == 10000 &&
+                 memcmp(part.data, large_content.c_str(), part.length) == 0;
+        }
+      }
+    }
+  }
+
+  return found_complete;
+}
+
+bool test_reset_parser() {
+  std::string boundary = "test";
+  std::string data1 = "--test\r\n"
+                      "Content-Disposition: form-data; name=\"file1\"; filename=\"a.txt\"\r\n"
+                      "\r\n"
+                      "AAA\r\n"
+                      "--test--\r\n";
+
+  std::string data2 = "--test\r\n"
+                      "Content-Disposition: form-data; name=\"file2\"; filename=\"b.txt\"\r\n"
+                      "\r\n"
+                      "BBB\r\n"
+                      "--test--\r\n";
+
+  MultipartParser parser(boundary);
+
+  // Parse first data
+  parser.parse(reinterpret_cast<const uint8_t *>(data1.c_str()), data1.length());
+  MultipartParser::Part part1;
+  parser.get_current_part(part1);
+
+  // Reset and parse second data
+  parser.reset();
+  parser.parse(reinterpret_cast<const uint8_t *>(data2.c_str()), data2.length());
+  MultipartParser::Part part2;
+  parser.get_current_part(part2);
+
+  return part1.filename == "a.txt" && part1.length == 3 && memcmp(part1.data, "AAA", 3) == 0 &&
+         part2.filename == "b.txt" && part2.length == 3 && memcmp(part2.data, "BBB", 3) == 0;
+}
+
+void run_all_tests() {
+  std::cout << "Running Multipart Parser Tests..." << std::endl;
+
+  print_test_result("Simple multipart", test_simple_multipart());
+  print_test_result("Chunked parsing", test_chunked_parsing());
+  print_test_result("Multiple parts", test_multiple_parts());
+  print_test_result("Boundary edge cases", test_boundary_edge_cases());
+  print_test_result("Empty filename", test_empty_filename());
+  print_test_result("Content-Type header", test_content_type_header());
+  print_test_result("Large content", test_large_content());
+  print_test_result("Reset parser", test_reset_parser());
+}
+
+}  // namespace test
+}  // namespace web_server_idf
+}  // namespace esphome
+
+// Standalone test runner
+int main() {
+  esphome::web_server_idf::test::run_all_tests();
+  return 0;
+}
+
+#endif  // USE_WEBSERVER_OTA
+#endif  // USE_ESP_IDF
\ No newline at end of file
diff --git a/esphome/components/web_server_idf/web_server_idf.cpp b/esphome/components/web_server_idf/web_server_idf.cpp
index 1aad9b49d2..93425862d2 100644
--- a/esphome/components/web_server_idf/web_server_idf.cpp
+++ b/esphome/components/web_server_idf/web_server_idf.cpp
@@ -10,6 +10,7 @@
 #include "utils.h"
 #ifdef USE_WEBSERVER_OTA
 #include "multipart_parser.h"
+#include "multipart_parser_utils.h"
 #endif
 
 #include "web_server_idf.h"
@@ -78,19 +79,16 @@ esp_err_t AsyncWebServer::request_post_handler(httpd_req_t *r) {
 
 #ifdef USE_WEBSERVER_OTA
   // Check if this is a multipart form data request (for OTA updates)
+  const char *boundary_start = nullptr;
+  size_t boundary_len = 0;
   bool is_multipart = false;
-  std::string boundary;
+
   if (content_type.has_value()) {
-    std::string ct = content_type.value();
-    if (ct.find("multipart/form-data") != std::string::npos) {
-      is_multipart = true;
-      // Extract boundary
-      size_t boundary_pos = ct.find("boundary=");
-      if (boundary_pos != std::string::npos) {
-        boundary = ct.substr(boundary_pos + 9);
-      }
-    } else if (ct != "application/x-www-form-urlencoded") {
-      ESP_LOGW(TAG, "Unsupported content type for POST: %s", ct.c_str());
+    const char *ct = content_type.value().c_str();
+    is_multipart = parse_multipart_boundary(ct, &boundary_start, &boundary_len);
+
+    if (!is_multipart && !is_form_urlencoded(ct)) {
+      ESP_LOGW(TAG, "Unsupported content type for POST: %s", ct);
       // fallback to get handler to support backward compatibility
       return AsyncWebServer::request_handler(r);
     }
@@ -111,7 +109,7 @@ esp_err_t AsyncWebServer::request_post_handler(httpd_req_t *r) {
 
 #ifdef USE_WEBSERVER_OTA
   // Handle multipart form data
-  if (is_multipart && !boundary.empty()) {
+  if (is_multipart && boundary_start && boundary_len > 0) {
     // Create request object
     AsyncWebServerRequest req(r);
     auto *server = static_cast<AsyncWebServer *>(r->user_ctx);
@@ -130,7 +128,8 @@ esp_err_t AsyncWebServer::request_post_handler(httpd_req_t *r) {
       return ESP_OK;
     }
 
-    // Handle multipart upload
+    // Handle multipart upload - create boundary string only when needed
+    std::string boundary(boundary_start, boundary_len);
     MultipartParser parser(boundary);
     static constexpr size_t CHUNK_SIZE = 1024;
     uint8_t *chunk_buf = new uint8_t[CHUNK_SIZE];