revert binary search, get wire type

This commit is contained in:
J. Nick Koston 2025-07-11 05:38:37 -10:00
parent f2037aadc5
commit 984c24a1b5
No known key found for this signature in database
4 changed files with 715 additions and 803 deletions

File diff suppressed because it is too large Load Diff

View File

@ -349,16 +349,19 @@ void ProtoMessage::decode(const uint8_t *buffer, size_t length) {
}
ProtoVarInt value = *value_res;
// Try regular fields first using binary search
if (const FieldMeta *field = find_field_binary(fields, field_count, field_id, 0)) {
void *field_addr = base + field->get_offset();
decoded = decode_varint_field(field->get_type(), field_addr, value);
// Try regular fields first
for (uint8_t j = 0; j < field_count; j++) {
if (fields[j].field_num == field_id && get_wire_type(fields[j].get_type()) == 0) {
void *field_addr = base + fields[j].get_offset();
decoded = decode_varint_field(fields[j].get_type(), field_addr, value);
break;
}
}
// If not found, try repeated fields (linear search - usually only 1-2 fields)
// If not found, try repeated fields
if (!decoded) {
for (uint8_t j = 0; j < repeated_count; j++) {
if (repeated_fields[j].field_num == field_id && repeated_fields[j].get_wire_type() == 0) {
if (repeated_fields[j].field_num == field_id && get_wire_type(repeated_fields[j].get_type()) == 0) {
void *field_addr = base + repeated_fields[j].get_offset();
decoded = decode_repeated_varint_field(repeated_fields[j].get_type(), field_addr, value);
break;
@ -386,16 +389,19 @@ void ProtoMessage::decode(const uint8_t *buffer, size_t length) {
ProtoLengthDelimited value(&buffer[i], field_length);
// Try regular fields first using binary search
if (const FieldMeta *field = find_field_binary(fields, field_count, field_id, 2)) {
void *field_addr = base + field->get_offset();
decoded = decode_length_field(field->get_type(), field_addr, value, field->get_message_type_id());
// Try regular fields first
for (uint8_t j = 0; j < field_count; j++) {
if (fields[j].field_num == field_id && get_wire_type(fields[j].get_type()) == 2) {
void *field_addr = base + fields[j].get_offset();
decoded = decode_length_field(fields[j].get_type(), field_addr, value, fields[j].get_message_type_id());
break;
}
}
// If not found, try repeated fields (linear search - usually only 1-2 fields)
// If not found, try repeated fields
if (!decoded) {
for (uint8_t j = 0; j < repeated_count; j++) {
if (repeated_fields[j].field_num == field_id && repeated_fields[j].get_wire_type() == 2) {
if (repeated_fields[j].field_num == field_id && get_wire_type(repeated_fields[j].get_type()) == 2) {
void *field_addr = base + repeated_fields[j].get_offset();
decoded = decode_repeated_length_field(repeated_fields[j].get_type(), field_addr, value,
repeated_fields[j].get_message_type_id());
@ -421,16 +427,19 @@ void ProtoMessage::decode(const uint8_t *buffer, size_t length) {
raw |= uint32_t(buffer[i + 3]) << 24;
Proto32Bit value(raw);
// Try regular fields first using binary search
if (const FieldMeta *field = find_field_binary(fields, field_count, field_id, 5)) {
void *field_addr = base + field->get_offset();
decoded = decode_32bit_field(field->get_type(), field_addr, value);
// Try regular fields first
for (uint8_t j = 0; j < field_count; j++) {
if (fields[j].field_num == field_id && get_wire_type(fields[j].get_type()) == 5) {
void *field_addr = base + fields[j].get_offset();
decoded = decode_32bit_field(fields[j].get_type(), field_addr, value);
break;
}
}
// If not found, try repeated fields (linear search - usually only 1-2 fields)
// If not found, try repeated fields
if (!decoded) {
for (uint8_t j = 0; j < repeated_count; j++) {
if (repeated_fields[j].field_num == field_id && repeated_fields[j].get_wire_type() == 5) {
if (repeated_fields[j].field_num == field_id && get_wire_type(repeated_fields[j].get_type()) == 5) {
void *field_addr = base + repeated_fields[j].get_offset();
decoded = decode_repeated_32bit_field(repeated_fields[j].get_type(), field_addr, value);
break;

View File

@ -259,7 +259,7 @@ extern const uint8_t REPEATED_MESSAGE_HANDLER_COUNT;
// Optimized metadata structure (4 bytes - no padding on 32-bit architectures)
struct FieldMeta {
uint8_t field_num; // Protobuf field number (1-255)
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: wire_type_high_bit
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: reserved
union {
uint16_t offset; // For non-message types: offset in class (0-65535)
struct {
@ -271,16 +271,6 @@ struct FieldMeta {
// Helper methods
inline ProtoFieldType get_type() const { return static_cast<ProtoFieldType>(type_and_size & 0x1F); }
inline uint8_t get_precalced_size() const { return ((type_and_size >> 5) & 0x03) + 1; }
inline uint8_t get_wire_type() const {
// Wire type is encoded as: 0=varint, 2=length-delimited, 5=32-bit
// We only need 1 bit to distinguish between 0/2 and 5 (32-bit)
// If bit 7 is set, it's wire type 5, otherwise check the field type
if (type_and_size & 0x80) {
return 5; // 32-bit types
}
ProtoFieldType t = get_type();
return (t >= ProtoFieldType::TYPE_STRING) ? 2 : 0; // length-delimited : varint
}
inline uint16_t get_offset() const {
if (get_type() == ProtoFieldType::TYPE_MESSAGE) {
// Reconstruct full offset from packed fields (10-bit offset)
@ -295,7 +285,7 @@ struct FieldMeta {
// Optimized repeated field metadata (4 bytes - no padding on 32-bit architectures)
struct RepeatedFieldMeta {
uint8_t field_num; // Protobuf field number (1-255)
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: wire_type_high_bit
uint8_t type_and_size; // bits 0-4: ProtoFieldType, bits 5-6: precalced_field_id_size-1, bit 7: reserved
union {
uint16_t offset; // For non-message types: offset in class (0-65535)
struct {
@ -307,16 +297,6 @@ struct RepeatedFieldMeta {
// Helper methods
inline ProtoFieldType get_type() const { return static_cast<ProtoFieldType>(type_and_size & 0x1F); }
inline uint8_t get_precalced_size() const { return ((type_and_size >> 5) & 0x03) + 1; }
inline uint8_t get_wire_type() const {
// Wire type is encoded as: 0=varint, 2=length-delimited, 5=32-bit
// We only need 1 bit to distinguish between 0/2 and 5 (32-bit)
// If bit 7 is set, it's wire type 5, otherwise check the field type
if (type_and_size & 0x80) {
return 5; // 32-bit types
}
ProtoFieldType t = get_type();
return (t >= ProtoFieldType::TYPE_STRING) ? 2 : 0; // length-delimited : varint
}
inline uint16_t get_offset() const {
if (get_type() == ProtoFieldType::TYPE_MESSAGE) {
// Reconstruct full offset from packed fields (10-bit offset)
@ -328,48 +308,6 @@ struct RepeatedFieldMeta {
inline uint8_t get_message_type_id() const { return message_type_id >> 2; } // Upper 6 bits for type ID (0-63)
};
// Binary search for field lookup - optimized for performance
inline const FieldMeta *find_field_binary(const FieldMeta *fields, uint8_t count, uint8_t field_id, uint8_t wire_type) {
uint8_t left = 0;
uint8_t right = count;
while (left < right) {
uint8_t mid = (left + right) / 2;
uint8_t mid_field = fields[mid].field_num;
if (mid_field < field_id) {
left = mid + 1;
} else if (mid_field > field_id) {
right = mid;
} else {
// Found field_id, check wire type
if (fields[mid].get_wire_type() == wire_type) {
return &fields[mid];
}
// Field number matches but wire type doesn't - search nearby entries
// (in case there are multiple fields with same number but different types)
// Search backwards
for (uint8_t k = mid; k > 0 && fields[k - 1].field_num == field_id; k--) {
if (fields[k - 1].get_wire_type() == wire_type) {
return &fields[k - 1];
}
}
// Search forwards
for (uint8_t k = mid + 1; k < count && fields[k].field_num == field_id; k++) {
if (fields[k].get_wire_type() == wire_type) {
return &fields[k];
}
}
return nullptr; // Field number found but no matching wire type
}
}
return nullptr; // Field not found
}
class ProtoWriteBuffer {
public:
ProtoWriteBuffer(std::vector<uint8_t> *buffer) : buffer_(buffer) {}

View File

@ -101,13 +101,6 @@ PROTO_TYPE_NUM_MAP = {
descriptor.FieldDescriptorProto.TYPE_SFIXED32: 13,
}
# Wire type 5 (32-bit) field types that need bit 7 set
WIRE_TYPE_5_TYPES = {
descriptor.FieldDescriptorProto.TYPE_FLOAT,
descriptor.FieldDescriptorProto.TYPE_FIXED32,
descriptor.FieldDescriptorProto.TYPE_SFIXED32,
}
# Generate with
# protoc --python_out=script/api_protobuf -I esphome/components/api/ api_options.proto
@ -802,39 +795,6 @@ class RepeatedTypeInfo(TypeInfo):
return underlying_size * 2
def pack_type_and_size(
field_type_or_num: int | descriptor.FieldDescriptorProto.Type, field_tag_size: int
) -> int:
"""Pack field type, tag size, and wire type bit into type_and_size byte.
Args:
field_type_or_num: Either a FieldDescriptorProto type constant or a direct type number
field_tag_size: The precalculated field ID size (1-3)
Bit layout:
- bits 0-4: ProtoFieldType (5 bits)
- bits 5-6: precalced_field_id_size - 1 (2 bits)
- bit 7: wire type bit (1 if wire type 5 for 32-bit types)
"""
# Handle direct type numbers (for EnumType=7, MessageType=10)
if isinstance(field_type_or_num, int):
type_num = field_type_or_num
else:
type_num = PROTO_TYPE_NUM_MAP.get(field_type_or_num, 0)
type_and_size = (type_num & 0x1F) | ((field_tag_size - 1) << 5)
# Set bit 7 for 32-bit types (wire type 5)
# Only check if we have a descriptor type, not a raw number
if (
not isinstance(field_type_or_num, int)
and field_type_or_num in WIRE_TYPE_5_TYPES
):
type_and_size |= 0x80
return type_and_size
def build_type_usage_map(
file_desc: descriptor.FileDescriptorProto,
) -> tuple[dict[str, str | None], dict[str, str | None]]:
@ -1301,7 +1261,9 @@ def build_message_type(
field_type = PROTO_TYPE_MAP.get(field.type, None)
if field_type:
field_tag_size = ti.calculate_field_id_size()
type_and_size = pack_type_and_size(field.type, field_tag_size)
# Pack type and size into type_and_size byte
type_num = PROTO_TYPE_NUM_MAP.get(field.type, 0)
type_and_size = (type_num & 0x1F) | ((field_tag_size - 1) << 5)
if field.type == descriptor.FieldDescriptorProto.TYPE_MESSAGE:
# For messages, use offset_low and message_type_id with offset extension
@ -1325,7 +1287,9 @@ def build_message_type(
if field_type:
field_tag_size = ti.calculate_field_id_size()
type_and_size = pack_type_and_size(field.type, field_tag_size)
# Pack type and size into type_and_size byte
type_num = PROTO_TYPE_NUM_MAP.get(field.type, 0)
type_and_size = (type_num & 0x1F) | ((field_tag_size - 1) << 5)
if field.type == descriptor.FieldDescriptorProto.TYPE_MESSAGE:
# For messages, use offset_low and message_type_id
@ -1356,14 +1320,14 @@ def build_message_type(
elif isinstance(ti, EnumType):
field_tag_size = ti.calculate_field_id_size()
# Enums are TYPE_ENUM (7)
type_and_size = pack_type_and_size(7, field_tag_size)
type_and_size = (7 & 0x1F) | ((field_tag_size - 1) << 5)
regular_fields.append(
f"{{{field.number}, {type_and_size}, {{.offset = PROTO_FIELD_OFFSET({desc.name}, {ti.field_name})}}}}"
)
elif isinstance(ti, MessageType):
field_tag_size = ti.calculate_field_id_size()
# Messages are TYPE_MESSAGE (10)
type_and_size = pack_type_and_size(10, field_tag_size)
type_and_size = (10 & 0x1F) | ((field_tag_size - 1) << 5)
message_type_id = type_registry.get_message_type_id(ti.type_name)
offset = f"PROTO_FIELD_OFFSET({desc.name}, {ti.field_name})"
# Same encoding as above for large offsets