This commit is contained in:
J. Nick Koston 2025-07-10 22:06:31 -10:00
parent ac0f279e7e
commit 3570b888d2
No known key found for this signature in database
4 changed files with 823 additions and 734 deletions

File diff suppressed because it is too large Load Diff

View File

@ -349,23 +349,17 @@ void ProtoMessage::decode(const uint8_t *buffer, size_t length) {
}
ProtoVarInt value = *value_res;
// Try regular fields first
for (uint8_t j = 0; j < field_count; j++) {
if (fields[j].field_num == field_id && get_wire_type(fields[j].get_type()) == 0) {
void *field_addr = base + fields[j].get_offset();
decoded = decode_varint_field(fields[j].get_type(), field_addr, value);
break;
}
// Try regular fields first using binary search
if (const FieldMeta *field = find_field_binary(fields, field_count, field_id, 0)) {
void *field_addr = base + field->get_offset();
decoded = decode_varint_field(field->get_type(), field_addr, value);
}
// If not found, try repeated fields
if (!decoded) {
for (uint8_t j = 0; j < repeated_count; j++) {
if (repeated_fields[j].field_num == field_id && get_wire_type(repeated_fields[j].get_type()) == 0) {
void *field_addr = base + repeated_fields[j].get_offset();
decoded = decode_repeated_varint_field(repeated_fields[j].get_type(), field_addr, value);
break;
}
if (const RepeatedFieldMeta *field = find_field_binary(repeated_fields, repeated_count, field_id, 0)) {
void *field_addr = base + field->get_offset();
decoded = decode_repeated_varint_field(field->get_type(), field_addr, value);
}
}
@ -389,24 +383,17 @@ void ProtoMessage::decode(const uint8_t *buffer, size_t length) {
ProtoLengthDelimited value(&buffer[i], field_length);
// Try regular fields first
for (uint8_t j = 0; j < field_count; j++) {
if (fields[j].field_num == field_id && get_wire_type(fields[j].get_type()) == 2) {
void *field_addr = base + fields[j].get_offset();
decoded = decode_length_field(fields[j].get_type(), field_addr, value, fields[j].get_message_type_id());
break;
}
// Try regular fields first using binary search
if (const FieldMeta *field = find_field_binary(fields, field_count, field_id, 2)) {
void *field_addr = base + field->get_offset();
decoded = decode_length_field(field->get_type(), field_addr, value, field->get_message_type_id());
}
// If not found, try repeated fields
if (!decoded) {
for (uint8_t j = 0; j < repeated_count; j++) {
if (repeated_fields[j].field_num == field_id && get_wire_type(repeated_fields[j].get_type()) == 2) {
void *field_addr = base + repeated_fields[j].get_offset();
decoded = decode_repeated_length_field(repeated_fields[j].get_type(), field_addr, value,
repeated_fields[j].get_message_type_id());
break;
}
if (const RepeatedFieldMeta *field = find_field_binary(repeated_fields, repeated_count, field_id, 2)) {
void *field_addr = base + field->get_offset();
decoded = decode_repeated_length_field(field->get_type(), field_addr, value, field->get_message_type_id());
}
}
@ -427,23 +414,17 @@ void ProtoMessage::decode(const uint8_t *buffer, size_t length) {
raw |= uint32_t(buffer[i + 3]) << 24;
Proto32Bit value(raw);
// Try regular fields first
for (uint8_t j = 0; j < field_count; j++) {
if (fields[j].field_num == field_id && get_wire_type(fields[j].get_type()) == 5) {
void *field_addr = base + fields[j].get_offset();
decoded = decode_32bit_field(fields[j].get_type(), field_addr, value);
break;
}
// Try regular fields first using binary search
if (const FieldMeta *field = find_field_binary(fields, field_count, field_id, 5)) {
void *field_addr = base + field->get_offset();
decoded = decode_32bit_field(field->get_type(), field_addr, value);
}
// If not found, try repeated fields
if (!decoded) {
for (uint8_t j = 0; j < repeated_count; j++) {
if (repeated_fields[j].field_num == field_id && get_wire_type(repeated_fields[j].get_type()) == 5) {
void *field_addr = base + repeated_fields[j].get_offset();
decoded = decode_repeated_32bit_field(repeated_fields[j].get_type(), field_addr, value);
break;
}
if (const RepeatedFieldMeta *field = find_field_binary(repeated_fields, repeated_count, field_id, 5)) {
void *field_addr = base + field->get_offset();
decoded = decode_repeated_32bit_field(field->get_type(), field_addr, value);
}
}

View File

@ -257,7 +257,7 @@ extern const uint8_t REPEATED_MESSAGE_HANDLER_COUNT;
// Optimized metadata structure (4 bytes - no padding on 32-bit architectures)
struct FieldMeta {
uint8_t field_num; // Protobuf field number (1-255)
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: reserved
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: wire_type_high_bit
union {
uint16_t offset; // For non-message types: offset in class (0-65535)
struct {
@ -269,6 +269,16 @@ struct FieldMeta {
// Helper methods
ProtoFieldType get_type() const { return static_cast<ProtoFieldType>(type_and_size & 0x1F); }
uint8_t get_precalced_size() const { return ((type_and_size >> 5) & 0x03) + 1; }
uint8_t get_wire_type() const {
// Wire type is encoded as: 0=varint, 2=length-delimited, 5=32-bit
// We only need 1 bit to distinguish between 0/2 and 5 (32-bit)
// If bit 7 is set, it's wire type 5, otherwise check the field type
if (type_and_size & 0x80) {
return 5; // 32-bit types
}
ProtoFieldType t = get_type();
return (t >= ProtoFieldType::TYPE_STRING) ? 2 : 0; // length-delimited : varint
}
uint16_t get_offset() const {
if (get_type() == ProtoFieldType::TYPE_MESSAGE) {
// Reconstruct full offset from packed fields (10-bit offset)
@ -283,7 +293,7 @@ struct FieldMeta {
// Optimized repeated field metadata (4 bytes - no padding on 32-bit architectures)
struct RepeatedFieldMeta {
uint8_t field_num; // Protobuf field number (1-255)
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: reserved
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: wire_type_high_bit
union {
uint16_t offset; // For non-message types: offset in class (0-65535)
struct {
@ -295,6 +305,16 @@ struct RepeatedFieldMeta {
// Helper methods
ProtoFieldType get_type() const { return static_cast<ProtoFieldType>(type_and_size & 0x1F); }
uint8_t get_precalced_size() const { return ((type_and_size >> 5) & 0x03) + 1; }
uint8_t get_wire_type() const {
// Wire type is encoded as: 0=varint, 2=length-delimited, 5=32-bit
// We only need 1 bit to distinguish between 0/2 and 5 (32-bit)
// If bit 7 is set, it's wire type 5, otherwise check the field type
if (type_and_size & 0x80) {
return 5; // 32-bit types
}
ProtoFieldType t = get_type();
return (t >= ProtoFieldType::TYPE_STRING) ? 2 : 0; // length-delimited : varint
}
uint16_t get_offset() const {
if (get_type() == ProtoFieldType::TYPE_MESSAGE) {
// Reconstruct full offset from packed fields (10-bit offset)
@ -306,6 +326,49 @@ struct RepeatedFieldMeta {
uint8_t get_message_type_id() const { return message_type_id >> 2; } // Upper 6 bits for type ID (0-63)
};
// Binary search for field lookup - optimized for performance
template<typename MetaType>
inline const MetaType *find_field_binary(const MetaType *fields, uint8_t count, uint8_t field_id, uint8_t wire_type) {
uint8_t left = 0;
uint8_t right = count;
while (left < right) {
uint8_t mid = (left + right) / 2;
uint8_t mid_field = fields[mid].field_num;
if (mid_field < field_id) {
left = mid + 1;
} else if (mid_field > field_id) {
right = mid;
} else {
// Found field_id, check wire type
if (fields[mid].get_wire_type() == wire_type) {
return &fields[mid];
}
// Field number matches but wire type doesn't - search nearby entries
// (in case there are multiple fields with same number but different types)
// Search backwards
for (uint8_t k = mid; k > 0 && fields[k - 1].field_num == field_id; k--) {
if (fields[k - 1].get_wire_type() == wire_type) {
return &fields[k - 1];
}
}
// Search forwards
for (uint8_t k = mid + 1; k < count && fields[k].field_num == field_id; k++) {
if (fields[k].get_wire_type() == wire_type) {
return &fields[k];
}
}
return nullptr; // Field number found but no matching wire type
}
}
return nullptr; // Field not found
}
class ProtoWriteBuffer {
public:
ProtoWriteBuffer(std::vector<uint8_t> *buffer) : buffer_(buffer) {}

View File

@ -101,6 +101,13 @@ PROTO_TYPE_NUM_MAP = {
descriptor.FieldDescriptorProto.TYPE_SFIXED32: 13,
}
# Wire type 5 (32-bit) field types that need bit 7 set
WIRE_TYPE_5_TYPES = {
descriptor.FieldDescriptorProto.TYPE_FLOAT,
descriptor.FieldDescriptorProto.TYPE_FIXED32,
descriptor.FieldDescriptorProto.TYPE_SFIXED32,
}
# Generate with
# protoc --python_out=script/api_protobuf -I esphome/components/api/ api_options.proto
@ -795,6 +802,39 @@ class RepeatedTypeInfo(TypeInfo):
return underlying_size * 2
def pack_type_and_size(
field_type_or_num: int | descriptor.FieldDescriptorProto.Type, field_tag_size: int
) -> int:
"""Pack field type, tag size, and wire type bit into type_and_size byte.
Args:
field_type_or_num: Either a FieldDescriptorProto type constant or a direct type number
field_tag_size: The precalculated field ID size (1-3)
Bit layout:
- bits 0-4: ProtoFieldType (5 bits)
- bits 5-6: precalced_field_id_size - 1 (2 bits)
- bit 7: wire type bit (1 if wire type 5 for 32-bit types)
"""
# Handle direct type numbers (for EnumType=7, MessageType=10)
if isinstance(field_type_or_num, int):
type_num = field_type_or_num
else:
type_num = PROTO_TYPE_NUM_MAP.get(field_type_or_num, 0)
type_and_size = (type_num & 0x1F) | ((field_tag_size - 1) << 5)
# Set bit 7 for 32-bit types (wire type 5)
# Only check if we have a descriptor type, not a raw number
if (
not isinstance(field_type_or_num, int)
and field_type_or_num in WIRE_TYPE_5_TYPES
):
type_and_size |= 0x80
return type_and_size
def build_type_usage_map(
file_desc: descriptor.FileDescriptorProto,
) -> tuple[dict[str, str | None], dict[str, str | None]]:
@ -1261,9 +1301,7 @@ def build_message_type(
field_type = PROTO_TYPE_MAP.get(field.type, None)
if field_type:
field_tag_size = ti.calculate_field_id_size()
# Pack type and size into type_and_size byte
type_num = PROTO_TYPE_NUM_MAP.get(field.type, 0)
type_and_size = (type_num & 0x1F) | ((field_tag_size - 1) << 5)
type_and_size = pack_type_and_size(field.type, field_tag_size)
if field.type == descriptor.FieldDescriptorProto.TYPE_MESSAGE:
# For messages, use offset_low and message_type_id with offset extension
@ -1287,9 +1325,7 @@ def build_message_type(
if field_type:
field_tag_size = ti.calculate_field_id_size()
# Pack type and size into type_and_size byte
type_num = PROTO_TYPE_NUM_MAP.get(field.type, 0)
type_and_size = (type_num & 0x1F) | ((field_tag_size - 1) << 5)
type_and_size = pack_type_and_size(field.type, field_tag_size)
if field.type == descriptor.FieldDescriptorProto.TYPE_MESSAGE:
# For messages, use offset_low and message_type_id
@ -1320,14 +1356,14 @@ def build_message_type(
elif isinstance(ti, EnumType):
field_tag_size = ti.calculate_field_id_size()
# Enums are TYPE_ENUM (7)
type_and_size = (7 & 0x1F) | ((field_tag_size - 1) << 5)
type_and_size = pack_type_and_size(7, field_tag_size)
regular_fields.append(
f"{{{field.number}, {type_and_size}, {{.offset = PROTO_FIELD_OFFSET({desc.name}, {ti.field_name})}}}}"
)
elif isinstance(ti, MessageType):
field_tag_size = ti.calculate_field_id_size()
# Messages are TYPE_MESSAGE (10)
type_and_size = (10 & 0x1F) | ((field_tag_size - 1) << 5)
type_and_size = pack_type_and_size(10, field_tag_size)
message_type_id = type_registry.get_message_type_id(ti.type_name)
offset = f"PROTO_FIELD_OFFSET({desc.name}, {ti.field_name})"
# Same encoding as above for large offsets
@ -1842,18 +1878,28 @@ namespace api {
# Generate metadata arrays
if regular_fields:
cpp += f"const FieldMeta {class_name}::FIELDS[{len(regular_fields)}] = {{\n"
for i, field in enumerate(regular_fields):
if i < len(regular_fields) - 1:
# Sort fields by field number for binary search
sorted_fields = sorted(
regular_fields, key=lambda f: int(f.split(",")[0].strip("{"))
)
cpp += (
f"const FieldMeta {class_name}::FIELDS[{len(sorted_fields)}] = {{\n"
)
for i, field in enumerate(sorted_fields):
if i < len(sorted_fields) - 1:
cpp += f" {field},\n"
else:
cpp += f" {field}\n"
cpp += "};\n"
if repeated_fields:
cpp += f"const RepeatedFieldMeta {class_name}::REPEATED_FIELDS[{len(repeated_fields)}] = {{\n"
for i, field in enumerate(repeated_fields):
if i < len(repeated_fields) - 1:
# Sort fields by field number for binary search
sorted_fields = sorted(
repeated_fields, key=lambda f: int(f.split(",")[0].strip("{"))
)
cpp += f"const RepeatedFieldMeta {class_name}::REPEATED_FIELDS[{len(sorted_fields)}] = {{\n"
for i, field in enumerate(sorted_fields):
if i < len(sorted_fields) - 1:
cpp += f" {field},\n"
else:
cpp += f" {field}\n"