mirror of
https://github.com/arendst/Tasmota.git
synced 2025-04-19 12:27:16 +00:00
Berry now accepts 'bytes()' as precompiled patterns, added 're.compilebytes()' (#23149)
This commit is contained in:
parent
6486ba3b26
commit
1b51aef911
@ -17,6 +17,7 @@ All notable changes to this project will be documented in this file.
|
||||
- ESP32 enable webcam version 2 (#18732)
|
||||
- ESP8266 enable FTP for >= 4MB variants (#23120)
|
||||
- Berry update flasher for Sonoff ZBBridge Pro (#23136)
|
||||
- Berry `re` now accepts `bytes()` as precompiled patterns, added `re.compilebytes()`
|
||||
|
||||
### Fixed
|
||||
- Berry prevent `import` from hiding a solidified class (#23112)
|
||||
|
@ -64,6 +64,25 @@ int be_re_compile(bvm *vm) {
|
||||
be_raise(vm, "type_error", NULL);
|
||||
}
|
||||
|
||||
// Native functions be_const_func()
|
||||
// Berry: `re.compilebytes(pattern:string) -> instance(bytes)`
|
||||
int be_re_compilebytes(bvm *vm) {
|
||||
int32_t argc = be_top(vm); // Get the number of arguments
|
||||
if (argc >= 1 && be_isstring(vm, 1)) {
|
||||
const char * regex_str = be_tostring(vm, 1);
|
||||
int sz = re1_5_sizecode(regex_str);
|
||||
if (sz < 0) {
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
|
||||
be_pushbytes(vm, NULL, sizeof(ByteProg) + sz);
|
||||
ByteProg *code = (ByteProg*) be_tobytes(vm, -1, NULL);
|
||||
re1_5_compilecode(code, regex_str);
|
||||
be_return(vm);
|
||||
}
|
||||
be_raise(vm, "type_error", NULL);
|
||||
}
|
||||
|
||||
// pushes either a list if matched, else `nil`
|
||||
// return index of next offset, or -1 if not found
|
||||
const char *be_re_match_search_run(bvm *vm, ByteProg *code, const char *hay, bbool is_anchored, bbool size_only) {
|
||||
@ -99,9 +118,10 @@ const char *be_re_match_search_run(bvm *vm, ByteProg *code, const char *hay, bbo
|
||||
|
||||
int be_re_match_search(bvm *vm, bbool is_anchored, bbool size_only) {
|
||||
int32_t argc = be_top(vm); // Get the number of arguments
|
||||
if (argc >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
|
||||
const char * regex_str = be_tostring(vm, 1);
|
||||
if (argc >= 2 && (be_isstring(vm, 1) || be_isbytes(vm, 1)) && be_isstring(vm, 2)) {
|
||||
const char * hay = be_tostring(vm, 2);
|
||||
ByteProg *code = NULL;
|
||||
|
||||
int32_t offset = 0;
|
||||
if (argc >= 3 && be_isint(vm, 3)) {
|
||||
offset = be_toint(vm, 3);
|
||||
@ -111,22 +131,31 @@ int be_re_match_search(bvm *vm, bbool is_anchored, bbool size_only) {
|
||||
if (offset >= hay_len) { be_return_nil(vm); } // any match of empty string returns nil, this catches implicitly when hay_len == 0
|
||||
hay += offset; // shift to offset
|
||||
|
||||
int sz = re1_5_sizecode(regex_str);
|
||||
if (sz < 0) {
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
if (be_isstring(vm, 1)) {
|
||||
const char * regex_str = be_tostring(vm, 1);
|
||||
int sz = re1_5_sizecode(regex_str);
|
||||
if (sz < 0) {
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
|
||||
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
|
||||
if (code == NULL) {
|
||||
be_throw(vm, BE_MALLOC_FAIL); /* lack of heap space */
|
||||
}
|
||||
int ret = re1_5_compilecode(code, regex_str);
|
||||
if (ret != 0) {
|
||||
be_os_free(code);
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
code = be_os_malloc(sizeof(ByteProg) + sz);
|
||||
if (code == NULL) {
|
||||
be_throw(vm, BE_MALLOC_FAIL); /* lack of heap space */
|
||||
}
|
||||
int ret = re1_5_compilecode(code, regex_str);
|
||||
if (ret != 0) {
|
||||
be_os_free(code);
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
} else {
|
||||
code = (ByteProg *) be_tobytes(vm, 1, NULL);
|
||||
}
|
||||
// do the match
|
||||
be_re_match_search_run(vm, code, hay, is_anchored, size_only);
|
||||
be_os_free(code);
|
||||
// cleanup
|
||||
if (be_isstring(vm, 1)) {
|
||||
be_os_free(code);
|
||||
}
|
||||
be_return(vm);
|
||||
}
|
||||
be_raise(vm, "type_error", NULL);
|
||||
@ -134,26 +163,32 @@ int be_re_match_search(bvm *vm, bbool is_anchored, bbool size_only) {
|
||||
|
||||
int be_re_match_search_all(bvm *vm, bbool is_anchored) {
|
||||
int32_t argc = be_top(vm); // Get the number of arguments
|
||||
if (argc >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
|
||||
const char * regex_str = be_tostring(vm, 1);
|
||||
if (argc >= 2 && (be_isstring(vm, 1) || be_isbytes(vm, 1)) && be_isstring(vm, 2)) {
|
||||
const char * hay = be_tostring(vm, 2);
|
||||
ByteProg *code = NULL;
|
||||
int limit = -1;
|
||||
if (argc >= 3) {
|
||||
limit = be_toint(vm, 3);
|
||||
}
|
||||
int sz = re1_5_sizecode(regex_str);
|
||||
if (sz < 0) {
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
|
||||
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
|
||||
if (code == NULL) {
|
||||
be_throw(vm, BE_MALLOC_FAIL); /* lack of heap space */
|
||||
}
|
||||
int ret = re1_5_compilecode(code, regex_str);
|
||||
if (ret != 0) {
|
||||
be_os_free(code);
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
if (be_isstring(vm, 1)) {
|
||||
const char * regex_str = be_tostring(vm, 1);
|
||||
int sz = re1_5_sizecode(regex_str);
|
||||
if (sz < 0) {
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
|
||||
code = be_os_malloc(sizeof(ByteProg) + sz);
|
||||
if (code == NULL) {
|
||||
be_throw(vm, BE_MALLOC_FAIL); /* lack of heap space */
|
||||
}
|
||||
int ret = re1_5_compilecode(code, regex_str);
|
||||
if (ret != 0) {
|
||||
be_os_free(code);
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
} else {
|
||||
code = (ByteProg *) be_tobytes(vm, 1, NULL);
|
||||
}
|
||||
|
||||
be_newobject(vm, "list");
|
||||
@ -165,7 +200,10 @@ int be_re_match_search_all(bvm *vm, bbool is_anchored) {
|
||||
be_pop(vm, 1);
|
||||
}
|
||||
be_pop(vm, 1);
|
||||
be_os_free(code);
|
||||
// cleanup
|
||||
if (be_isstring(vm, 1)) {
|
||||
be_os_free(code);
|
||||
}
|
||||
be_return(vm);
|
||||
}
|
||||
be_raise(vm, "type_error", NULL);
|
||||
@ -329,29 +367,36 @@ int re_pattern_split(bvm *vm) {
|
||||
// Berry: `re.split(pattern:string, s:string [, split_limit:int]) -> list(string)`
|
||||
int be_re_split(bvm *vm) {
|
||||
int32_t argc = be_top(vm); // Get the number of arguments
|
||||
if (argc >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
|
||||
const char * regex_str = be_tostring(vm, 1);
|
||||
if (argc >= 2 && (be_isstring(vm, 1) || be_isbytes(vm, 1)) && be_isstring(vm, 2)) {
|
||||
const char * hay = be_tostring(vm, 2);
|
||||
ByteProg *code = NULL;
|
||||
int split_limit = -1;
|
||||
if (argc >= 3) {
|
||||
split_limit = be_toint(vm, 3);
|
||||
}
|
||||
int sz = re1_5_sizecode(regex_str);
|
||||
if (sz < 0) {
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
if (be_isstring(vm, 1)) {
|
||||
const char * regex_str = be_tostring(vm, 1);
|
||||
int sz = re1_5_sizecode(regex_str);
|
||||
if (sz < 0) {
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
|
||||
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
|
||||
if (code == NULL) {
|
||||
be_throw(vm, BE_MALLOC_FAIL); /* lack of heap space */
|
||||
code = be_os_malloc(sizeof(ByteProg) + sz);
|
||||
if (code == NULL) {
|
||||
be_throw(vm, BE_MALLOC_FAIL); /* lack of heap space */
|
||||
}
|
||||
int ret = re1_5_compilecode(code, regex_str);
|
||||
if (ret != 0) {
|
||||
be_os_free(code);
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
} else {
|
||||
code = (ByteProg *) be_tobytes(vm, 1, NULL);
|
||||
}
|
||||
int ret = re1_5_compilecode(code, regex_str);
|
||||
if (ret != 0) {
|
||||
int ret = re_pattern_split_run(vm, code, hay, split_limit);
|
||||
if (be_isstring(vm, 1)) {
|
||||
be_os_free(code);
|
||||
be_raise(vm, "internal_error", "error in regex");
|
||||
}
|
||||
ret = re_pattern_split_run(vm, code, hay, split_limit);
|
||||
be_os_free(code);
|
||||
return ret;
|
||||
}
|
||||
be_raise(vm, "type_error", NULL);
|
||||
@ -363,6 +408,7 @@ int be_re_split(bvm *vm) {
|
||||
@const_object_info_begin
|
||||
module re (scope: global) {
|
||||
compile, func(be_re_compile)
|
||||
compilebytes, func(be_re_compilebytes)
|
||||
search, func(be_re_search)
|
||||
searchall, func(be_re_search_all)
|
||||
match, func(be_re_match)
|
||||
|
52
lib/libesp32/berry/tests/re.be
Normal file
52
lib/libesp32/berry/tests/re.be
Normal file
@ -0,0 +1,52 @@
|
||||
# test regex from re1.5
|
||||
import re
|
||||
|
||||
# standard use of lib
|
||||
assert(re.search("a.*?b(z+)", "zaaaabbbccbbzzzee") == ['aaaabbbccbbzzz', 'zzz'])
|
||||
assert(re.searchall('<([a-zA-Z]+)>', '<abc> yeah <xyz>') == [['<abc>', 'abc'], ['<xyz>', 'xyz']])
|
||||
|
||||
assert(re.match("a.*?b(z+)", "aaaabbbccbbzzzee") == ['aaaabbbccbbzzz', 'zzz'])
|
||||
assert(re.match2("a.*?b(z+)", "aaaabbbccbbzzzee") == [14, 'zzz'])
|
||||
assert(re.matchall('<([a-zA-Z]+)>', '<abc> yeah <xyz>') == [['<abc>', 'abc']])
|
||||
assert(re.matchall('<([a-zA-Z]+)>', '<abc><xyz>') == [['<abc>', 'abc'], ['<xyz>', 'xyz']])
|
||||
assert(re.split('/', "foo/bar//baz") == ['foo', 'bar', '', 'baz'])
|
||||
|
||||
# pre-compile
|
||||
var rr
|
||||
rr = re.compile("a.*?b(z+)")
|
||||
assert(rr.search("zaaaabbbccbbzzzee") == ['aaaabbbccbbzzz', 'zzz'])
|
||||
rr = re.compile('<([a-zA-Z]+)>')
|
||||
assert(rr.searchall('<abc> yeah <xyz>') == [['<abc>', 'abc'], ['<xyz>', 'xyz']])
|
||||
|
||||
rr = re.compile("a.*?b(z+)")
|
||||
assert(rr.match("aaaabbbccbbzzzee") == ['aaaabbbccbbzzz', 'zzz'])
|
||||
assert(rr.match2("aaaabbbccbbzzzee") == [14, 'zzz'])
|
||||
rr = re.compile('<([a-zA-Z]+)>')
|
||||
assert(rr.matchall('<abc> yeah <xyz>') == [['<abc>', 'abc']])
|
||||
assert(rr.matchall('<abc><xyz>') == [['<abc>', 'abc'], ['<xyz>', 'xyz']])
|
||||
rr = re.compile('/')
|
||||
assert(rr.split("foo/bar//baz") == ['foo', 'bar', '', 'baz'])
|
||||
|
||||
# compile to bytes
|
||||
var rb
|
||||
rb = re.compilebytes("a.*?b(z+)")
|
||||
assert(re.search(rb, "zaaaabbbccbbzzzee") == ['aaaabbbccbbzzz', 'zzz'])
|
||||
assert(rb == bytes('1B0000000F0000000100000062030260FB7E00016162030260FB01627E02017A62FC7E037E017F'))
|
||||
|
||||
rb = re.compilebytes('<([a-zA-Z]+)>')
|
||||
assert(re.searchall(rb, '<abc> yeah <xyz>') == [['<abc>', 'abc'], ['<xyz>', 'xyz']])
|
||||
assert(rb == bytes('1A0000000C0000000100000062030260FB7E00013C7E020302617A415A62F87E03013E7E017F'))
|
||||
|
||||
rb = re.compilebytes("a.*?b(z+)")
|
||||
assert(re.match(rb, "aaaabbbccbbzzzee") == ['aaaabbbccbbzzz', 'zzz'])
|
||||
assert(re.match2(rb, "aaaabbbccbbzzzee") == [14, 'zzz'])
|
||||
assert(rb == bytes('1B0000000F0000000100000062030260FB7E00016162030260FB01627E02017A62FC7E037E017F'))
|
||||
|
||||
rb = re.compilebytes('<([a-zA-Z]+)>')
|
||||
assert(re.matchall(rb, '<abc> yeah <xyz>') == [['<abc>', 'abc']])
|
||||
assert(re.matchall(rb, '<abc><xyz>') == [['<abc>', 'abc'], ['<xyz>', 'xyz']])
|
||||
assert(rb == bytes('1A0000000C0000000100000062030260FB7E00013C7E020302617A415A62F87E03013E7E017F'))
|
||||
|
||||
rb = re.compilebytes('/')
|
||||
assert(re.split(rb, "foo/bar//baz") == ['foo', 'bar', '', 'baz'])
|
||||
assert(rb == bytes('0C000000070000000000000062030260FB7E00012F7E017F'))
|
@ -238,7 +238,10 @@ class webserver_async
|
||||
# pre: self.buf_in is not empty
|
||||
# post: self.buf_in has made progress (smaller or '')
|
||||
def parse_http_req_line()
|
||||
var m = global._re_http_srv.match2(self.buf_in, self.buf_in_offset)
|
||||
import re
|
||||
# print("parse_http_req_line", "self.buf_in=", self.buf_in)
|
||||
var m = re.match2(self.server.re_http_srv, self.buf_in, self.buf_in_offset)
|
||||
# print(f"{m=}")
|
||||
# Ex: "GET / HTTP/1.1\r\n"
|
||||
if m
|
||||
var offset = m[0]
|
||||
@ -261,16 +264,18 @@ class webserver_async
|
||||
#############################################################
|
||||
# parse incoming headers
|
||||
def parse_http_headers()
|
||||
import re
|
||||
while true
|
||||
# print("parse_http_headers", "self.buf_in_offset=", self.buf_in_offset)
|
||||
var m = global._re_http_srv_header.match2(self.buf_in, self.buf_in_offset)
|
||||
# print("m=", m)
|
||||
var m = re.match2(self.server.re_http_srv_header, self.buf_in, self.buf_in_offset)
|
||||
# print(f"{m=}")
|
||||
# Ex: [32, 'Content-Type', 'application/json']
|
||||
if m
|
||||
self.event_http_header(m[1], m[2])
|
||||
self.buf_in_offset += m[0]
|
||||
else # no more headers
|
||||
var m2 = global._re_http_srv_body.match2(self.buf_in, self.buf_in_offset)
|
||||
var m2 = re.match2(self.server.re_http_srv_body, self.buf_in, self.buf_in_offset)
|
||||
# print(f"{m2=}")
|
||||
if m2
|
||||
# end of headers
|
||||
# we keep \r\n which is used by pattern
|
||||
@ -519,9 +524,16 @@ class webserver_async
|
||||
var p1 # temporary object bytes() to avoid reallocation
|
||||
|
||||
# static var TIMEOUT = 1000 # default timeout: 1000ms
|
||||
|
||||
#############################################################
|
||||
# pre-compile REGEX
|
||||
#
|
||||
# static var HTTP_REQ = "^(\\w+) (\\S+) HTTP\\/(\\d\\.\\d)\r\n"
|
||||
# static var HTTP_HEADER_REGEX = "([A-Za-z0-9-]+): (.*?)\r\n" # extract a header with its 2 parts
|
||||
# static var HTTP_BODY_REGEX = "\r\n" # end of headers
|
||||
static var re_http_srv = re.compilebytes("^(\\w+) (\\S+) HTTP\\/(\\d\\.\\d)\r\n")
|
||||
static var re_http_srv_header = re.compilebytes("([A-Za-z0-9-]+): (.*?)\r\n")
|
||||
static var re_http_srv_body = re.compilebytes("\r\n")
|
||||
|
||||
#############################################################
|
||||
# init
|
||||
@ -535,27 +547,12 @@ class webserver_async
|
||||
self.cors = false
|
||||
self.p1 = bytes(100) # reserve 100 bytes by default
|
||||
# TODO what about max_clients ?
|
||||
self.compile_re()
|
||||
# register cb
|
||||
tasmota.add_driver(self)
|
||||
self.fastloop_cb = def () self.loop() end
|
||||
tasmota.add_fast_loop(self.fastloop_cb)
|
||||
end
|
||||
|
||||
#############################################################
|
||||
# compile once for all the regex
|
||||
def compile_re()
|
||||
import re
|
||||
if !global.contains("_re_http_srv")
|
||||
# global._re_http_srv = re.compile(self.HTTP_REQ)
|
||||
# global._re_http_srv_header = re.compile(self.HTTP_HEADER_REGEX)
|
||||
# global._re_http_srv_body = re.compile(self.HTTP_BODY_REGEX)
|
||||
global._re_http_srv = re.compile("^(\\w+) (\\S+) HTTP\\/(\\d\\.\\d)\r\n")
|
||||
global._re_http_srv_header = re.compile("([A-Za-z0-9-]+): (.*?)\r\n")
|
||||
global._re_http_srv_body = re.compile("\r\n")
|
||||
end
|
||||
end
|
||||
|
||||
#############################################################
|
||||
# enable or disable chunked mode (enabled by default)
|
||||
def set_chunked(chunked)
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user