diff --git a/lib/Unishox-1.0-shadinger/generator/generator.c b/lib/Unishox-1.0-shadinger/generator/generator.c new file mode 100644 index 000000000..81c46649e --- /dev/null +++ b/lib/Unishox-1.0-shadinger/generator/generator.c @@ -0,0 +1,165 @@ +/* + * Copyright (C) 2019 Siara Logics (cc) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @author Arundale R. + * + */ + +// Pre-compute c_95[] and l_95[] + +#include +#include +#include +#include +#include +#include + +typedef unsigned char byte; + +enum {SHX_SET1 = 0, SHX_SET1A, SHX_SET1B, SHX_SET2, SHX_SET3, SHX_SET4, SHX_SET4A}; +char us_vcodes[] = {0, 2, 3, 4, 10, 11, 12, 13, 14, 30, 31}; +char us_vcode_lens[] = {2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5}; +char us_sets[][11] = + {{ 0, ' ', 'e', 0, 't', 'a', 'o', 'i', 'n', 's', 'r'}, + { 0, 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b', 'g', 'w'}, + {'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z', 0, 0, 0}, + { 0, '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'}, + {'.', ',', '-', '/', '?', '+', ' ', '(', ')', '$', '@'}, + {';', '#', ':', '<', '^', '*', '"', '{', '}', '[', ']'}, + {'=', '%', '\'', '>', '&', '_', '!', '\\', '|', '~', '`'}}; + // {{ 0, ' ', 'e', 0, 't', 'a', 'o', 'i', 'n', 's', 'r'}, + // { 0, 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b', 'g', 'w'}, + // {'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z', 0, 0, 0}, + // { 0, '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'}, + // {'.', ',', '-', '/', '=', '+', ' ', '(', ')', '$', '%'}, + // {'&', ';', ':', '<', '>', '*', '"', '{', '}', '[', ']'}, + // {'@', '?', '\'', '^', '#', '_', '!', '\\', '|', '~', '`'}}; + +unsigned int c_95[95] ; +unsigned char l_95[95] ; + + +void init_coder() { + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 11; j++) { + char c = us_sets[i][j]; + if (c != 0 && c != 32) { + int ascii = c - 32; + //int prev_code = c_95[ascii]; + //int prev_code_len = l_95[ascii]; + switch (i) { + case SHX_SET1: // just us_vcode + c_95[ascii] = (us_vcodes[j] << (16 - us_vcode_lens[j])); + l_95[ascii] = us_vcode_lens[j]; + //checkPreus_vcodes(c, prev_code, prev_code_len, c_95[ascii], l_95[ascii]); + if (c >= 'a' && c <= 'z') { + ascii -= ('a' - 'A'); + //prev_code = c_95[ascii]; + //prev_code_len = l_95[ascii]; + c_95[ascii] = (2 << 12) + (us_vcodes[j] << (12 - us_vcode_lens[j])); + l_95[ascii] = 4 + us_vcode_lens[j]; + } + break; + case SHX_SET1A: // 000 + us_vcode + c_95[ascii] = 0 + (us_vcodes[j] << (13 - us_vcode_lens[j])); + l_95[ascii] = 3 + us_vcode_lens[j]; + //checkPreus_vcodes(c, prev_code, prev_code_len, c_95[ascii], l_95[ascii]); + if (c >= 'a' && c <= 'z') { + ascii -= ('a' - 'A'); + //prev_code = c_95[ascii]; + //prev_code_len = l_95[ascii]; + c_95[ascii] = (2 << 12) + 0 + (us_vcodes[j] << (9 - us_vcode_lens[j])); + l_95[ascii] = 4 + 3 + us_vcode_lens[j]; + } + break; + case SHX_SET1B: // 00110 + us_vcode + c_95[ascii] = (6 << 11) + (us_vcodes[j] << (11 - us_vcode_lens[j])); + l_95[ascii] = 5 + us_vcode_lens[j]; + //checkPreus_vcodes(c, prev_code, prev_code_len, c_95[ascii], l_95[ascii]); + if (c >= 'a' && c <= 'z') { + ascii -= ('a' - 'A'); + //prev_code = c_95[ascii]; + //prev_code_len = l_95[ascii]; + c_95[ascii] = (2 << 12) + (6 << 7) + (us_vcodes[j] << (7 - us_vcode_lens[j])); + l_95[ascii] = 4 + 5 + us_vcode_lens[j]; + } + break; + case SHX_SET2: // 0011100 + us_vcode + c_95[ascii] = (28 << 9) + (us_vcodes[j] << (9 - us_vcode_lens[j])); + l_95[ascii] = 7 + us_vcode_lens[j]; + break; + case SHX_SET3: // 0011101 + us_vcode + c_95[ascii] = (29 << 9) + (us_vcodes[j] << (9 - us_vcode_lens[j])); + l_95[ascii] = 7 + us_vcode_lens[j]; + break; + case SHX_SET4: // 0011110 + us_vcode + c_95[ascii] = (30 << 9) + (us_vcodes[j] << (9 - us_vcode_lens[j])); + l_95[ascii] = 7 + us_vcode_lens[j]; + break; + case SHX_SET4A: // 0011111 + us_vcode + c_95[ascii] = (31 << 9) + (us_vcodes[j] << (9 - us_vcode_lens[j])); + l_95[ascii] = 7 + us_vcode_lens[j]; + } + //checkPreus_vcodes(c, prev_code, prev_code_len, c_95[ascii], l_95[ascii]); + } + } + } + c_95[0] = 16384; + l_95[0] = 3; + +} + +int main(int argv, char *args[]) { + init_coder(); + + printf("uint16_t c_95[95] PROGMEM = {"); + for (uint8_t i = 0; i<95; i++) { + if (i) { printf(", "); } + printf("0x%04X", c_95[i]); + } + printf(" };\n"); + + printf("uint8_t l_95[95] PROGMEM = {"); + for (uint8_t i = 0; i<95; i++) { + if (i) { printf(", "); } + printf("%6d", l_95[i]); + } + printf(" };\n"); + + printf("\n\n"); + + printf("uint16_t c_95[95] PROGMEM = {"); + for (uint8_t i = 0; i<95; i++) { + if (i) { printf(", "); } + printf("%5d", c_95[i]); + } + printf(" };\n"); + + printf("uint8_t l_95[95] PROGMEM = {"); + for (uint8_t i = 0; i<95; i++) { + if (i) { printf(", "); } + printf("%5d", l_95[i]); + } + printf(" };\n"); + + + printf("uint16_t cl_95[95] PROGMEM = {"); + for (uint8_t i = 0; i<95; i++) { + if (i) { printf(", "); } + printf("0x%04X + %2d", c_95[i], l_95[i]); + } + printf(" };\n"); + +} \ No newline at end of file diff --git a/lib/Unishox-1.0-shadinger/generator/remapping.xlsx b/lib/Unishox-1.0-shadinger/generator/remapping.xlsx new file mode 100644 index 000000000..94a82ecde Binary files /dev/null and b/lib/Unishox-1.0-shadinger/generator/remapping.xlsx differ diff --git a/lib/Unishox-1.0-shadinger/library.properties.txt b/lib/Unishox-1.0-shadinger/library.properties.txt new file mode 100644 index 000000000..138b2027c --- /dev/null +++ b/lib/Unishox-1.0-shadinger/library.properties.txt @@ -0,0 +1,8 @@ +name=Unishox Compressor Decompressor highly customized and optimized for ESP8266 and Tasmota +version=1.0 +author=Arundale Ramanathan, Stephan Hadinger +maintainer=Arun , Stephan +sentence=Unishox compression for Tasmota Rules +paragraph=It is based on Unishox hybrid encoding technique. This version has specific Unicode code removed for size. +url=https://github.com/siara-cc/Unishox +architectures=esp8266 diff --git a/lib/Unishox-1.0-shadinger/src/unishox.cpp b/lib/Unishox-1.0-shadinger/src/unishox.cpp new file mode 100644 index 000000000..72c9d3915 --- /dev/null +++ b/lib/Unishox-1.0-shadinger/src/unishox.cpp @@ -0,0 +1,602 @@ +/* + * Copyright (C) 2019 Siara Logics (cc) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @author Arundale R. + * + */ + +/* + * + * This is a highly modified and optimized version of Unishox + * for Tasmota, aimed at compressing `Rules` which are typically + * short strings from 50 to 500 bytes. + * + * - moved to C++ (but still C-style) + * - c_95[] and l_95[] are pre-computed + * - all arrays in PROGMEM + * - removed all Unicode specific code to get code smaller, Unicode is rare in rules and encoded as pure binary + * - removed prev_lines management to reduce code size, we don't track previous encodings + * - using C++ const instead of #define + * - reusing the Unicode market to encode pure binary, which is 3 bits instead of 9 + * - reverse binary encoding to 255-byte, favoring short encoding for values above 127, typical of Unicode + * - remove 2 bits encoding for Counts, since it could lead to a series of more than 8 consecutive 0-bits and output NULL char. + * Minimum encoding is 5 bits, which means spending 3+1=4 more bits for values in the range 0..3 + * - removed CRLF encoding and reusing entry for RPT, saving 3 bits for repeats. Note: any CR will be binary encded + * - add safeguard to the output size (len_out), note that the compress buffer needs to be 4 bytes larger than actual compressed output. + * This is needed to avoid crash, since output can have ~30 bits + * - combined c_95[] and l_95[] to a single array to save space + * - Changed mapping of some characters in Set3, Set4 and Set4A, favoring frequent characters in rules and javascript + * - Added escape mechanism to ensure we never output NULL char. The marker is 0x2A which looked rare in preliminary tests + * + * @author Stephan Hadinger + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "unishox.h" + +typedef unsigned char byte; +// we squeeze both c_95[] and l_95[] in a sinle array. +// c_95[] uses only the 3 upper nibbles (or 12 most signifcant bits), while the last nibble encodes length (3..13) +uint16_t cl_95[95] PROGMEM = {0x4000 + 3, 0x3F80 + 11, 0x3D80 + 11, 0x3C80 + 10, 0x3BE0 + 12, 0x3E80 + 10, 0x3F40 + 11, 0x3EC0 + 10, 0x3BA0 + 11, 0x3BC0 + 11, 0x3D60 + 11, 0x3B60 + 11, 0x3A80 + 10, 0x3AC0 + 10, 0x3A00 + 9, 0x3B00 + 10, 0x38C0 + 10, 0x3900 + 10, 0x3940 + 11, 0x3960 + 11, 0x3980 + 11, 0x39A0 + 11, 0x39C0 + 11, 0x39E0 + 12, 0x39F0 + 12, 0x3880 + 10, 0x3CC0 + 10, 0x3C00 + 9, 0x3D00 + 10, 0x3E00 + 9, 0x3F00 + 10, 0x3B40 + 11, 0x3BF0 + 12, 0x2B00 + 8, 0x21C0 + 11, 0x20C0 + 10, 0x2100 + 10, 0x2600 + 7, 0x2300 + 11, 0x21E0 + 12, 0x2140 + 11, 0x2D00 + 8, 0x2358 + 13, 0x2340 + 12, 0x2080 + 10, 0x21A0 + 11, 0x2E00 + 8, 0x2C00 + 8, 0x2180 + 11, 0x2350 + 13, 0x2F80 + 9, 0x2F00 + 9, 0x2A00 + 8, 0x2160 + 11, 0x2330 + 12, 0x21F0 + 12, 0x2360 + 13, 0x2320 + 12, 0x2368 + 13, 0x3DE0 + 12, 0x3FA0 + 11, 0x3DF0 + 12, 0x3D40 + 11, 0x3F60 + 11, 0x3FF0 + 12, 0xB000 + 4, 0x1C00 + 7, 0x0C00 + 6, 0x1000 + 6, 0x6000 + 3, 0x3000 + 7, 0x1E00 + 8, 0x1400 + 7, 0xD000 + 4, 0x3580 + 9, 0x3400 + 8, 0x0800 + 6, 0x1A00 + 7, 0xE000 + 4, 0xC000 + 4, 0x1800 + 7, 0x3500 + 9, 0xF800 + 5, 0xF000 + 5, 0xA000 + 4, 0x1600 + 7, 0x3300 + 8, 0x1F00 + 8, 0x3600 + 9, 0x3200 + 8, 0x3680 + 9, 0x3DA0 + 11, 0x3FC0 + 11, 0x3DC0 + 11, 0x3FE0 + 12 }; +// Original version with c/l separate +// uint16_t c_95[95] PROGMEM = {0x4000, 0x3F80, 0x3D80, 0x3C80, 0x3BE0, 0x3E80, 0x3F40, 0x3EC0, 0x3BA0, 0x3BC0, 0x3D60, 0x3B60, 0x3A80, 0x3AC0, 0x3A00, 0x3B00, 0x38C0, 0x3900, 0x3940, 0x3960, 0x3980, 0x39A0, 0x39C0, 0x39E0, 0x39F0, 0x3880, 0x3CC0, 0x3C00, 0x3D00, 0x3E00, 0x3F00, 0x3B40, 0x3BF0, 0x2B00, 0x21C0, 0x20C0, 0x2100, 0x2600, 0x2300, 0x21E0, 0x2140, 0x2D00, 0x2358, 0x2340, 0x2080, 0x21A0, 0x2E00, 0x2C00, 0x2180, 0x2350, 0x2F80, 0x2F00, 0x2A00, 0x2160, 0x2330, 0x21F0, 0x2360, 0x2320, 0x2368, 0x3DE0, 0x3FA0, 0x3DF0, 0x3D40, 0x3F60, 0x3FF0, 0xB000, 0x1C00, 0x0C00, 0x1000, 0x6000, 0x3000, 0x1E00, 0x1400, 0xD000, 0x3580, 0x3400, 0x0800, 0x1A00, 0xE000, 0xC000, 0x1800, 0x3500, 0xF800, 0xF000, 0xA000, 0x1600, 0x3300, 0x1F00, 0x3600, 0x3200, 0x3680, 0x3DA0, 0x3FC0, 0x3DC0, 0x3FE0 }; +// uint8_t l_95[95] PROGMEM = { 3, 11, 11, 10, 12, 10, 11, 10, 11, 11, 11, 11, 10, 10, 9, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 10, 10, 9, 10, 9, 10, 11, 12, 8, 11, 10, 10, 7, 11, 12, 11, 8, 13, 12, 10, 11, 8, 8, 11, 13, 9, 9, 8, 11, 12, 12, 13, 12, 13, 12, 11, 12, 11, 11, 12, 4, 7, 6, 6, 3, 7, 8, 7, 4, 9, 8, 6, 7, 4, 4, 7, 9, 5, 5, 4, 7, 8, 8, 9, 8, 9, 11, 11, 11, 12 }; + +enum {SHX_STATE_1 = 1, SHX_STATE_2}; // removed Unicode state + +enum {SHX_SET1 = 0, SHX_SET1A, SHX_SET1B, SHX_SET2, SHX_SET3, SHX_SET4, SHX_SET4A}; +// changed mapping in Set3, Set4, Set4A to accomodate frequencies in Rules and Javascript +char sets[][11] PROGMEM = + {{ 0, ' ', 'e', 0, 't', 'a', 'o', 'i', 'n', 's', 'r'}, + { 0, 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b', 'g', 'w'}, + {'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z', 0, 0, 0}, + { 0, '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'}, + {'.', ',', '-', '/', '?', '+', ' ', '(', ')', '$', '@'}, + {';', '#', ':', '<', '^', '*', '"', '{', '}', '[', ']'}, + {'=', '%', '\'', '>', '&', '_', '!', '\\', '|', '~', '`'}}; + // {{ 0, ' ', 'e', 0, 't', 'a', 'o', 'i', 'n', 's', 'r'}, + // { 0, 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b', 'g', 'w'}, + // {'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z', 0, 0, 0}, + // { 0, '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'}, + // {'.', ',', '-', '/', '=', '+', ' ', '(', ')', '$', '%'}, + // {'&', ';', ':', '<', '>', '*', '"', '{', '}', '[', ']'}, + // {'@', '?', '\'', '^', '#', '_', '!', '\\', '|', '~', '`'}}; + +// Decoder is designed for using less memory, not speed +// Decode lookup table for code index and length +// First 2 bits 00, Next 3 bits indicate index of code from 0, +// last 3 bits indicate code length in bits +// 0, 1, 2, 3, 4, +char us_vcode[32] PROGMEM = + {2 + (0 << 3), 3 + (3 << 3), 3 + (1 << 3), 4 + (6 << 3), 0, +// 5, 6, 7, 8, 9, 10 + 4 + (4 << 3), 3 + (2 << 3), 4 + (8 << 3), 0, 0, 0, +// 11, 12, 13, 14, 15 + 4 + (7 << 3), 0, 4 + (5 << 3), 0, 5 + (9 << 3), +// 16, 17, 18, 19, 20, 21, 22, 23 + 0, 0, 0, 0, 0, 0, 0, 0, +// 24, 25, 26, 27, 28, 29, 30, 31 + 0, 0, 0, 0, 0, 0, 0, 5 + (10 << 3)}; +// 0, 1, 2, 3, 4, 5, 6, 7, +char us_hcode[32] PROGMEM = + {1 + (1 << 3), 2 + (0 << 3), 0, 3 + (2 << 3), 0, 0, 0, 5 + (3 << 3), +// 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 5 + (5 << 3), +// 16, 17, 18, 19, 20, 21, 22, 23 + 0, 0, 0, 0, 0, 0, 0, 5 + (4 << 3), +// 24, 25, 26, 27, 28, 29, 30, 31 + 0, 0, 0, 0, 0, 0, 0, 5 + (6 << 3)}; + +const char ESCAPE_MARKER = 0x2A; // Escape any null char + +const uint16_t TERM_CODE = 0x37C0; // 0b0011011111000000 +const uint16_t TERM_CODE_LEN = 10; +const uint16_t DICT_CODE = 0x0000; +const uint16_t DICT_CODE_LEN = 5; +const uint16_t DICT_OTHER_CODE = 0x0000; // not used +const uint16_t DICT_OTHER_CODE_LEN = 6; +// const uint16_t RPT_CODE = 0x2370; +// const uint16_t RPT_CODE_LEN = 13; +const uint16_t RPT_CODE_TASMOTA = 0x3780; +const uint16_t RPT_CODE_TASMOTA_LEN = 10; +const uint16_t BACK2_STATE1_CODE = 0x2000; // 0010 = back to lower case +const uint16_t BACK2_STATE1_CODE_LEN = 4; +const uint16_t BACK_FROM_UNI_CODE = 0xFE00; +const uint16_t BACK_FROM_UNI_CODE_LEN = 8; +// const uint16_t CRLF_CODE = 0x3780; +// const uint16_t CRLF_CODE_LEN = 10; +const uint16_t LF_CODE = 0x3700; +const uint16_t LF_CODE_LEN = 9; +const uint16_t TAB_CODE = 0x2400; +const uint16_t TAB_CODE_LEN = 7; +// const uint16_t UNI_CODE = 0x8000; // Unicode disabled +// const uint16_t UNI_CODE_LEN = 3; +// const uint16_t UNI_STATE_SPL_CODE = 0xF800; +// const uint16_t UNI_STATE_SPL_CODE_LEN = 5; +// const uint16_t UNI_STATE_DICT_CODE = 0xFC00; +// const uint16_t UNI_STATE_DICT_CODE_LEN = 7; +// const uint16_t CONT_UNI_CODE = 0x2800; +// const uint16_t CONT_UNI_CODE_LEN = 7; +const uint16_t ALL_UPPER_CODE = 0x2200; +const uint16_t ALL_UPPER_CODE_LEN = 8; +const uint16_t SW2_STATE2_CODE = 0x3800; +const uint16_t SW2_STATE2_CODE_LEN = 7; +const uint16_t ST2_SPC_CODE = 0x3B80; +const uint16_t ST2_SPC_CODE_LEN = 11; +const uint16_t BIN_CODE_TASMOTA = 0x8000; +const uint16_t BIN_CODE_TASMOTA_LEN = 3; +// const uint16_t BIN_CODE = 0x2000; +// const uint16_t BIN_CODE_LEN = 9; + +#define NICE_LEN 5 + +// uint16_t mask[] PROGMEM = {0x8000, 0xC000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00}; +uint8_t mask[] PROGMEM = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF}; + +int append_bits(char *out, size_t ol, unsigned int code, int clen, byte state) { + + byte cur_bit; + byte blen; + unsigned char a_byte; + + if (state == SHX_STATE_2) { + // remove change state prefix + if ((code >> 9) == 0x1C) { + code <<= 7; + clen -= 7; + } + //if (code == 14272 && clen == 10) { + // code = 9084; + // clen = 14; + //} + } + while (clen > 0) { + cur_bit = ol % 8; + blen = (clen > 8 ? 8 : clen); + // a_byte = (code & pgm_read_word(&mask[blen - 1])) >> 8; + // a_byte = (code & (pgm_read_word(&mask[blen - 1]) << 8)) >> 8; + a_byte = (code >> 8) & pgm_read_word(&mask[blen - 1]); + a_byte >>= cur_bit; + if (blen + cur_bit > 8) + blen = (8 - cur_bit); + if (out) { // if out == nullptr, then we are in dry-run mode + if (cur_bit == 0) + out[ol / 8] = a_byte; + else + out[ol / 8] |= a_byte; + } + code <<= blen; + ol += blen; + if ((out) && (0 == ol % 8)) { // if out == nullptr, dry-run mode. We miss the escaping of characters in the length + // we completed a full byte + char last_c = out[(ol / 8) - 1]; + if ((0 == last_c) || (ESCAPE_MARKER == last_c)) { + out[ol / 8] = 1 + last_c; // increment to 0x01 or 0x2B + out[(ol / 8) -1] = ESCAPE_MARKER; // replace old value with marker + ol += 8; // add one full byte + } + } + clen -= blen; + } + return ol; +} + +// First five bits are code and Last three bits of codes represent length +// removing last 2 bytes, unused, we will never have values above 600 bytes +// const byte codes[7] = {0x01, 0x82, 0xC3, 0xE5, 0xED, 0xF5, 0xFD}; +// const byte bit_len[7] = {2, 5, 7, 9, 12, 16, 17}; +// const uint16_t adder[7] = {0, 4, 36, 164, 676, 4772, 0}; +byte codes[] PROGMEM = { 0x82, 0xC3, 0xE5, 0xED, 0xF5 }; +byte bit_len[] PROGMEM = { 5, 7, 9, 12, 16 }; +// uint16_t adder[7] PROGMEM = { 0, 32, 160, 672, 4768 }; // no more used + +int encodeCount(char *out, int ol, int count) { + int till = 0; + int base = 0; + for (int i = 0; i < sizeof(bit_len); i++) { + uint32_t bit_len_i = pgm_read_byte(&bit_len[i]); + till += (1 << bit_len_i); + if (count < till) { + byte codes_i = pgm_read_byte(&codes[i]); + ol = append_bits(out, ol, (codes_i & 0xF8) << 8, codes_i & 0x07, 1); + // ol = append_bits(out, ol, (count - pgm_read_word(&adder[i])) << (16 - bit_len_i), bit_len_i, 1); + ol = append_bits(out, ol, (count - base) << (16 - bit_len_i), bit_len_i, 1); + return ol; + } + base = till; + } + return ol; +} + +int matchOccurance(const char *in, int len, int l, char *out, int *ol, byte *state, byte *is_all_upper) { + int j, k; + int longest_dist = 0; + int longest_len = 0; + for (j = l - NICE_LEN; j >= 0; j--) { + for (k = l; k < len && j + k - l < l; k++) { + if (in[k] != in[j + k - l]) + break; + } + // while ((((unsigned char) in[k]) >> 6) == 2) + // k--; // Skip partial UTF-8 matches + //if ((in[k - 1] >> 3) == 0x1E || (in[k - 1] >> 4) == 0x0E || (in[k - 1] >> 5) == 0x06) + // k--; + if (k - l > NICE_LEN - 1) { + int match_len = k - l - NICE_LEN; + int match_dist = l - j - NICE_LEN + 1; + if (match_len > longest_len) { + longest_len = match_len; + longest_dist = match_dist; + } + } + } + if (longest_len) { + if (*state == SHX_STATE_2 || *is_all_upper) { + *is_all_upper = 0; + *state = SHX_STATE_1; + *ol = append_bits(out, *ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, *state); + } + *ol = append_bits(out, *ol, DICT_CODE, DICT_CODE_LEN, 1); + *ol = encodeCount(out, *ol, longest_len); + *ol = encodeCount(out, *ol, longest_dist); + l += (longest_len + NICE_LEN); + l--; + return l; + } + return -l; +} + +// Compress a buffer. +// Inputs: +// - in: non-null pointer to a buffer of bytes to be compressed. Progmem is not valid. Null bytes are valid. +// - len: size of the input buffer. 0 is valid for empty buffer +// - out: pointer to output buffer. out is nullptr, the compressor does a dry-run and reports the compressed size without writing bytes +// - len_out: length in bytes of the output buffer. +// Output: +// - if >= 0: size of the compressed buffer. The output buffer does not contain NULL bytes, and it is not NULL terminated +// - if < 0: an error occured, most certainly the output buffer was not large enough +int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out) { + + char *ptr; + byte bits; + byte state; + + int l, ll, ol; + char c_in, c_next; + byte is_upper, is_all_upper; + + ol = 0; + state = SHX_STATE_1; + is_all_upper = 0; + for (l=0; l 0) { + continue; + } + l = -l; + } + if (state == SHX_STATE_2) { // if Set2 + if ((c_in >= ' ' && c_in <= '@') || + (c_in >= '[' && c_in <= '`') || + (c_in >= '{' && c_in <= '~')) { + } else { + state = SHX_STATE_1; // back to Set1 and lower case + ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state); + } + } + + is_upper = 0; + if (c_in >= 'A' && c_in <= 'Z') + is_upper = 1; + else { + if (is_all_upper) { + is_all_upper = 0; + ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state); + } + } + + c_next = 0; + if (l+1 < len) + c_next = in[l+1]; + + if (c_in >= 32 && c_in <= 126) { + if (is_upper && !is_all_upper) { + for (ll=l+5; ll>=l && ll 'Z') + break; + } + if (ll == l-1) { + ol = append_bits(out, ol, ALL_UPPER_CODE, ALL_UPPER_CODE_LEN, state); // CapsLock + is_all_upper = 1; + } + } + if (state == SHX_STATE_1 && c_in >= '0' && c_in <= '9') { + ol = append_bits(out, ol, SW2_STATE2_CODE, SW2_STATE2_CODE_LEN, state); // Switch to sticky Set2 + state = SHX_STATE_2; + } + c_in -= 32; + if (is_all_upper && is_upper) + c_in += 32; + if (c_in == 0 && state == SHX_STATE_2) + ol = append_bits(out, ol, ST2_SPC_CODE, ST2_SPC_CODE_LEN, state); // space from Set2 ionstead of Set1 + else { + // ol = append_bits(out, ol, pgm_read_word(&c_95[c_in]), pgm_read_byte(&l_95[c_in]), state); // original version with c/l in split arrays + uint16_t cl = pgm_read_word(&cl_95[c_in]); + ol = append_bits(out, ol, cl & 0xFFF0, cl & 0x000F, state); + } + } else + // if (c_in == 13 && c_next == 10) { // CRLF disabled + // ol = append_bits(out, ol, CRLF_CODE, CRLF_CODE_LEN, state); // CRLF + // l++; + // } else + if (c_in == 10) { + ol = append_bits(out, ol, LF_CODE, LF_CODE_LEN, state); // LF + } else + if (c_in == '\t') { + ol = append_bits(out, ol, TAB_CODE, TAB_CODE_LEN, state); // TAB + } else { + ol = append_bits(out, ol, BIN_CODE_TASMOTA, BIN_CODE_TASMOTA_LEN, state); // Binary, we reuse the Unicode marker which 3 bits instead of 9 + ol = encodeCount(out, ol, (unsigned char) 255 - c_in); + } + + // check that we have some headroom in the output buffer + if (ol / 8 >= len_out - 4) { + return -1; // we risk overflow and crash + } + } + + bits = ol % 8; + if (bits) { + ol = append_bits(out, ol, TERM_CODE, 8 - bits, 1); // 0011 0111 1100 0000 TERM = 0011 0111 11 + } + return ol/8+(ol%8?1:0); +} + +int getBitVal(const char *in, int bit_no, int count) { + char c_in = in[bit_no >> 3]; + if ((bit_no >> 3) && (ESCAPE_MARKER == in[(bit_no >> 3) - 1])) { // if previous byte is a marker, decrement + c_in--; + } + return (c_in & (0x80 >> (bit_no % 8)) ? 1 << count : 0); +} + +// Returns: +// 0..11 +// or -1 if end of stream +int getCodeIdx(char *code_type, const char *in, int len, int *bit_no_p) { + int code = 0; + int count = 0; + do { + // detect marker + if (ESCAPE_MARKER == in[*bit_no_p >> 3]) { + *bit_no_p += 8; // skip marker + } + if (*bit_no_p >= len) + return -1; // invalid state + code += getBitVal(in, *bit_no_p, count); + (*bit_no_p)++; + count++; + uint8_t code_type_code = pgm_read_byte(&code_type[code]); + if (code_type_code && (code_type_code & 0x07) == count) { + return code_type_code >> 3; + } + } while (count < 5); + return 1; // skip if code not found +} + +int getNumFromBits(const char *in, int bit_no, int count) { + int ret = 0; + while (count--) { + if (ESCAPE_MARKER == in[bit_no >> 3]) { + bit_no += 8; // skip marker + } + ret += getBitVal(in, bit_no++, count); + } + return ret; +} + +// const byte bit_len[7] = {5, 2, 7, 9, 12, 16, 17}; +// const uint16_t adder[7] = {4, 0, 36, 164, 676, 4772, 0}; + +// byte bit_len[7] PROGMEM = { 5, 7, 9, 12, 16 }; +// byte bit_len_read[7] PROGMEM = {5, 2, 7, 9, 12, 16 }; +// uint16_t adder_read[7] PROGMEM = {4, 0, 36, 164, 676, 4772, 0}; +// uint16_t adder_read[] PROGMEM = {0, 0, 32, 160, 672, 4768 }; + +// byte bit_len[7] PROGMEM = { 5, 7, 9, 12, 16 }; +// uint16_t adder_read[] PROGMEM = {0, 32, 160, 672, 4768 }; + +// Code size optimized, recalculate adder[] like in encodeCount +int readCount(const char *in, int *bit_no_p, int len) { + int idx = getCodeIdx(us_hcode, in, len, bit_no_p); + if (idx >= 1) idx--; // we skip v = 1 (code '0') since we no more accept 2 bits encoding + if ((idx >= sizeof(bit_len)) || (idx < 0)) return 0; // unsupported or end of stream + + int base; + int till = 0; + byte bit_len_idx; // bit_len[0] + for (uint32_t i = 0; i <= idx; i++) { + base = till; + bit_len_idx = pgm_read_byte(&bit_len[i]); + till += (1 << bit_len_idx); + } + int count = getNumFromBits(in, *bit_no_p, bit_len_idx) + base; + + (*bit_no_p) += bit_len_idx; + return count; +} + +int decodeRepeat(const char *in, int len, char *out, int ol, int *bit_no) { + int dict_len = readCount(in, bit_no, len) + NICE_LEN; + int dist = readCount(in, bit_no, len) + NICE_LEN - 1; + memcpy(out + ol, out + ol - dist, dict_len); + ol += dict_len; + + return ol; +} + +int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out) { + + int dstate; + int bit_no; + byte is_all_upper; + + int ol = 0; + bit_no = 0; + dstate = SHX_SET1; + is_all_upper = 0; + + len <<= 3; // *8, len in bits + out[ol] = 0; + while (bit_no < len) { + int h, v; + char c = 0; + byte is_upper = is_all_upper; + int orig_bit_no = bit_no; + v = getCodeIdx(us_vcode, in, len, &bit_no); // read vCode + if (v < 0) break; // end of stream + h = dstate; // Set1 or Set2 + if (v == 0) { // Switch which is common to Set1 and Set2, first entry + h = getCodeIdx(us_hcode, in, len, &bit_no); // read hCode + if (h < 0) break; // end of stream + if (h == SHX_SET1) { // target is Set1 + if (dstate == SHX_SET1) { // Switch from Set1 to Set1 us UpperCase + if (is_all_upper) { // if CapsLock, then back to LowerCase + is_upper = is_all_upper = 0; + continue; + } + v = getCodeIdx(us_vcode, in, len, &bit_no); // read again vCode + if (v < 0) break; // end of stream + if (v == 0) { + h = getCodeIdx(us_hcode, in, len, &bit_no); // read second hCode + if (h < 0) break; // end of stream + if (h == SHX_SET1) { // If double Switch Set1, the CapsLock + is_all_upper = 1; + continue; + } + } + is_upper = 1; // anyways, still uppercase + } else { + dstate = SHX_SET1; // if Set was not Set1, switch to Set1 + continue; + } + } else + if (h == SHX_SET2) { // If Set2, switch dstate to Set2 + if (dstate == SHX_SET1) // TODO: is this test useful, there are only 2 states possible + dstate = SHX_SET2; + continue; + } + if (h != SHX_SET1) { // all other Sets (why not else) + v = getCodeIdx(us_vcode, in, len, &bit_no); // we changed set, now read vCode for char + if (v < 0) break; // end of stream + } + } + + if (v == 0 && h == SHX_SET1A) { + if (is_upper) { + out[ol++] = 255 - readCount(in, &bit_no, len); // binary + } else { + ol = decodeRepeat(in, len, out, ol, &bit_no); // dist + } + continue; + } + + if (h == SHX_SET1 && v == 3) { + // was Unicode, will do Binary instead + out[ol++] = 255 - readCount(in, &bit_no, len); // binary + continue; + } + if (h < 7 && v < 11) // TODO: are these the actual limits? Not 11x7 ? + c = pgm_read_byte(&sets[h][v]); + if (c >= 'a' && c <= 'z') { + if (is_upper) + c -= 32; // go to UpperCase for letters + } else { // handle all other cases + if (is_upper && dstate == SHX_SET1 && v == 1) + c = '\t'; // If UpperCase Space, change to TAB + if (h == SHX_SET1B) { + if (8 == v) { // was LF or RPT, now only LF + // if (is_upper) { // rpt + // int count = readCount(in, &bit_no, len); + // count += 4; + // char rpt_c = out[ol - 1]; + // while (count--) + // out[ol++] = rpt_c; + // } else { + out[ol++] = '\n'; + // } + continue; + } + if (9 == v) { // was CRLF, now RPT + // out[ol++] = '\r'; // CRLF removed + // out[ol++] = '\n'; + int count = readCount(in, &bit_no, len); + count += 4; + if (ol + count >= len_out) { + return -1; // overflow + } + char rpt_c = out[ol - 1]; + while (count--) + out[ol++] = rpt_c; + continue; + } + if (10 == v) { + break; // TERM, stop decoding + } + } + } + out[ol++] = c; + + if (ol >= len_out) { + return -1; // overflow + } + } + + return ol; + +} diff --git a/lib/Unishox-1.0-shadinger/src/unishox.h b/lib/Unishox-1.0-shadinger/src/unishox.h new file mode 100644 index 000000000..4d6b81641 --- /dev/null +++ b/lib/Unishox-1.0-shadinger/src/unishox.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2019 Siara Logics (cc) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @author Arundale R. + * + */ +#ifndef unishox +#define unishox + +extern int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out); +extern int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out); + +#endif + diff --git a/tasmota/CHANGELOG.md b/tasmota/CHANGELOG.md index 5a35b9b6d..eeae3def4 100644 --- a/tasmota/CHANGELOG.md +++ b/tasmota/CHANGELOG.md @@ -11,6 +11,7 @@ - Change default PWM Frequency to 977 Hz from 223 Hz - Change minimum PWM Frequency from 100 Hz to 40 Hz - Change PWM updated to the latest version of Arduino PR #7231 +- Add automatic compression of Rules to achieve ~60% compression, added ``SetOption93 1`` to control caching of rules ### 8.2.0.5 20200425 diff --git a/tasmota/my_user_config.h b/tasmota/my_user_config.h index 72a763b3c..ce93a6eed 100644 --- a/tasmota/my_user_config.h +++ b/tasmota/my_user_config.h @@ -397,6 +397,7 @@ // -- Rules or Script ---------------------------- // Select none or only one of the below defines #define USE_RULES // Add support for rules (+8k code) + // #define USE_RULES_COMPRESSION // Compresses rules in Flash at about ~50% (+3.8k code) //#define USE_SCRIPT // Add support for script (+17k code) //#define USE_SCRIPT_FATFS 4 // Script: Add FAT FileSystem Support diff --git a/tasmota/settings.h b/tasmota/settings.h index 3925aa30d..edd3de523 100644 --- a/tasmota/settings.h +++ b/tasmota/settings.h @@ -112,7 +112,7 @@ typedef union { // Restricted by MISRA-C Rule 18.4 bu uint32_t only_json_message : 1; // bit 8 (v8.2.0.3) - SetOption90 - Disable non-json MQTT response uint32_t fade_at_startup : 1; // bit 9 (v8.2.0.3) - SetOption91 - Enable light fading at start/power on uint32_t pwm_ct_mode : 1; // bit 10 (v8.2.0.4) - SetOption92 - Set PWM Mode from regular PWM to ColorTemp control (Xiaomi Philips ...) - uint32_t spare11 : 1; + uint32_t compress_rules_cpu : 1; // bit 11 (v8.2.0.6) - SetOption93 - Keep uncompressed rules in memory to avoid CPU load of uncompressing at each tick uint32_t spare12 : 1; uint32_t spare13 : 1; uint32_t spare14 : 1; diff --git a/tasmota/settings.ino b/tasmota/settings.ino index 83527ea6f..de890ee41 100644 --- a/tasmota/settings.ino +++ b/tasmota/settings.ino @@ -1404,6 +1404,10 @@ void SettingsDelta(void) Settings.module = WEMOS; ModuleDefault(WEMOS); #endif // ESP32 + // make sure the empty rules have two consecutive NULLs, to be compatible with compressed rules + if (Settings.rules[0][0] == 0) { Settings.rules[0][1] = 0; } + if (Settings.rules[1][0] == 0) { Settings.rules[1][1] = 0; } + if (Settings.rules[2][0] == 0) { Settings.rules[2][1] = 0; } } Settings.version = VERSION; diff --git a/tasmota/xdrv_10_rules.ino b/tasmota/xdrv_10_rules.ino index fa5841564..c88806ced 100644 --- a/tasmota/xdrv_10_rules.ino +++ b/tasmota/xdrv_10_rules.ino @@ -66,6 +66,8 @@ #define XDRV_10 10 +#include + #define D_CMND_RULE "Rule" #define D_CMND_RULETIMER "RuleTimer" #define D_CMND_EVENT "Event" @@ -178,6 +180,222 @@ char rules_vars[MAX_RULE_VARS][33] = {{ 0 }}; #error MAX_RULE_MEMS is bigger than 16 #endif + +/*******************************************************************************************/ +/* + * Add Unishox compression to Rules + * + * New compression for Rules, depends on SetOption93 + * + * To avoid memory corruption when downgrading, the format is as follows: + * - If `SetOption93 0` + * Rule[x][] = 511 char max NULL terminated string (512 with trailing NULL) + * Rule[x][0] = 0 if the Rule is empty + * New: in case the string is empty we also enforce: + * Rule[x][1] = 0 (i.e. we have two conseutive NULLs) + * + * - If `SetOption93 1` + * If the rule is smaller than 511, it is stored uncompressed. Rule[x][0] is not null. + * If the rule is empty, Rule[x][0] = 0 and Rule[x][1] = 0; + * If the rule is bigger than 511, it is stored compressed + * The first byte of each Rule is always NULL. + * Rule[x][0] = 0, if firmware is downgraded, the rule will be considered as empty + * + * The second byte contains the size of uncompressed rule in 8-bytes blocks (i.e. (len+7)/8 ) + * Maximum rule size si 2KB (2048 bytes per rule), although there is little chances compression ratio will go down to 75% + * Rule[x][1] = size uncompressed in dwords. If zero, the rule is empty. + * + * The remaining bytes contain the compressed rule, NULL terminated + */ +/*******************************************************************************************/ + +#ifdef USE_RULES_COMPRESSION +// Statically allocate one String per rule +String k_rules[MAX_RULE_SETS] = { String(), String(), String() }; // Strings are created empty +#endif // USE_RULES_COMPRESSION + +// Returns whether the rule is uncompressed, which means the first byte is not NULL +inline bool IsRuleUncompressed(uint32_t idx) { +#ifdef USE_RULES_COMPRESSION + return Settings.rules[idx][0] ? true : false; // first byte not NULL, the rule is not empty and not compressed +#else + return true; +#endif +} + +// Returns whether the rule is empty, which requires two consecutive NULL +inline bool IsRuleEmpty(uint32_t idx) { +#ifdef USE_RULES_COMPRESSION + return (Settings.rules[idx][0] == 0) && (Settings.rules[idx][1] == 0) ? true : false; +#else + return (Settings.rules[idx][0] == 0) ? true : false; +#endif +} + +// Returns the approximate (+3-0) length of the rule, not counting the trailing NULL +size_t GetRuleLen(uint32_t idx) { + // no need to use #ifdef USE_RULES_COMPRESSION, the compiler will optimize since first test is always true + if (IsRuleUncompressed(idx)) { + return strlen(Settings.rules[idx]); + } else { // either empty or compressed + return Settings.rules[idx][1] * 8; // cheap calculation, but not byte accurate (may overshoot by 7) + } +} + +// Returns the actual Flash storage for the Rule, including trailing NULL +size_t GetRuleLenStorage(uint32_t idx) { + // no need to use #ifdef USE_RULES_COMPRESSION, the compiler will optimize since first test is always true + if (IsRuleUncompressed(idx)) { + return 1 + strlen(Settings.rules[idx]); + } else { + return 2 + strlen(&Settings.rules[idx][2]); // skip first byte and get len of the compressed rule + } +} + +// internal function, do the actual decompression +void GetRule_decompress(String &rule, const char *rule_head) { + size_t buf_len = 1 + *rule_head * 8; // the first byte contains size of buffer for uncompressed rule / 8, buf_len may overshoot by 7 + rule_head++; // advance to the actual compressed buffer + + // We use a nasty trick here. To avoid allocating twice the buffer, + // we first extend the buffer of the String object to the target size (maybe overshooting by 7 bytes) + // then we decompress in this buffer, + // and finally assign the raw string to the String, which happens to work: String uses memmove(), so overlapping works + rule.reserve(buf_len); + char* buf = rule.begin(); + + int32_t len_decompressed = unishox_decompress(rule_head, strlen(rule_head), buf, buf_len); + buf[len_decompressed] = 0; // add NULL terminator + + // AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Rawdecompressed: %d"), len_decompressed); + rule = buf; // assign the raw string to the String object (in reality re-writing the same data in the same place) +} + +// +// Read rule in memory, uncompress if needed +// +// Returns: String() object containing a copy of the rule (rule processing is destructive and will change the String) +String GetRule(uint32_t idx) { + if (IsRuleUncompressed(idx)) { + return String(Settings.rules[idx]); + } else { +#ifdef USE_RULES_COMPRESSION // we still do #ifdef to make sure we don't link unnecessary code + + String rule(""); + if (Settings.rules[idx][2] == 0) { return rule; } // the rule is empty + + // If the cache is empty, we need to decompress from Settings + if (0 == k_rules[idx].length() ) { + GetRule_decompress(rule, &Settings.rules[idx][1]); + if (!Settings.flag4.compress_rules_cpu) { + k_rules[idx] = rule; // keep a copy for next time + } + } else { + // we have a valid copy + rule = k_rules[idx]; + } + return rule; +#endif + } +} + +#ifdef USE_RULES_COMPRESSION +// internal function, comrpess rule and store a cached version uncompressed (except if SetOption94 1) +// If out == nullptr, we are in dry-run mode, so don't keep rule in cache +int32_t SetRule_compress(uint32_t idx, const char *in, size_t in_len, char *out, size_t out_len) { + int32_t len_compressed; + len_compressed = unishox_compress(in, in_len, out, out_len); + + if (len_compressed >= 0) { // negative means compression failed because of buffer too small, we leave the rule untouched + // check if we need to store in cache + k_rules[idx] = (const char*) nullptr; // Assign the String to nullptr, clears previous string and disallocate internal buffers of String object + if ((!Settings.flag4.compress_rules_cpu) && out) { // if out == nullptr, don't store cache + // keep copy in cache + k_rules[idx] = in; + } + } + return len_compressed; +} +#endif // USE_RULES_COMPRESSION + +// Returns: +// >= 0 : the actual stored size +// <0 : not enough space +int32_t SetRule(uint32_t idx, const char *content, bool append = false) { + if (nullptr == content) { content = ""; } // if nullptr, use empty string + size_t len_in = strlen(content); + bool needsCompress = false; + size_t offset = 0; + + if (len_in >= MAX_RULE_SIZE) { // if input is more than 512, it will not fit uncompressed + needsCompress = true; + } + if (append) { + if (IsRuleUncompressed(idx) || IsRuleEmpty(idx)) { // if already uncompressed (so below 512) and append mode, check if it still fits uncompressed + offset = strlen(Settings.rules[idx]); + if (len_in + offset >= MAX_RULE_SIZE) { + needsCompress = true; + } + } else { + needsCompress = true; // we append to a non-empty compressed rule, so it won't fit uncompressed + } + } + + if (!needsCompress) { // the rule fits uncompressed, so just copy it + strlcpy(Settings.rules[idx] + offset, content, sizeof(Settings.rules[idx])); + +#ifdef USE_RULES_COMPRESSION + // do a dry-run compression to display how much it would be compressed + int32_t len_compressed, len_uncompressed; + + len_uncompressed = strlen(Settings.rules[idx]); + len_compressed = unishox_compress(Settings.rules[idx], len_uncompressed, nullptr /* dry-run */, MAX_RULE_SIZE + 8); + AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Stored uncompressed, would compress from %d to %d (-%d%%)"), len_uncompressed, len_compressed, 100 - changeUIntScale(len_compressed, 0, len_uncompressed, 0, 100)); + +#endif // USE_RULES_COMPRESSION + + return len_in + offset; + } else { +#ifdef USE_RULES_COMPRESSION + int32_t len_compressed; + // allocate temp buffer so we don't nuke the rule if it's too big to fit + char *buf_out = (char*) malloc(MAX_RULE_SIZE + 8); // take some margin + if (!buf_out) { return -1; } // fail if couldn't allocate + + // compress + if (append) { + String content_append = GetRule(idx); // get original Rule and decompress it if needed + content_append += content; // concat new content + len_in = content_append.length(); // adjust length + len_compressed = SetRule_compress(idx, content_append.c_str(), len_in, buf_out, MAX_RULE_SIZE + 8); + } else { + len_compressed = SetRule_compress(idx, content, len_in, buf_out, MAX_RULE_SIZE + 8); + } + + if ((len_compressed >= 0) && (len_compressed < MAX_RULE_SIZE - 2)) { + // size is ok, copy to Settings + Settings.rules[idx][0] = 0; // clear first byte to mark as compressed + Settings.rules[idx][1] = (len_in + 7) / 8; // store original length in first bytes (4 bytes chuks) + memcpy(&Settings.rules[idx][2], buf_out, len_compressed); + Settings.rules[idx][len_compressed + 2] = 0; // add NULL termination + AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Compressed from %d to %d (-%d%%)"), len_in, len_compressed, 100 - changeUIntScale(len_compressed, 0, len_in, 0, 100)); + // AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: First bytes: %02X%02X%02X%02X"), Settings.rules[idx][0], Settings.rules[idx][1], Settings.rules[idx][2], Settings.rules[idx][3]); + // AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: GetRuleLenStorage = %d"), GetRuleLenStorage(idx)); + } else { + len_compressed = -1; // failed + // clear rule cache, so it will be reloaded from Settings + k_rules[idx] = (const char *) nullptr; + } + free(buf_out); + return len_compressed; + +#else // USE_RULES_COMPRESSION + return -1; // the rule does not fit and we can't compress +#endif // USE_RULES_COMPRESSION + } + +} + /*******************************************************************************************/ bool RulesRuleMatch(uint8_t rule_set, String &event, String &rule) @@ -419,7 +637,7 @@ bool RuleSetProcess(uint8_t rule_set, String &event_saved) //AddLog_P2(LOG_LEVEL_DEBUG, PSTR("RUL: Event = %s, Rule = %s"), event_saved.c_str(), Settings.rules[rule_set]); - String rules = Settings.rules[rule_set]; + String rules = GetRule(rule_set); Rules.trigger_count[rule_set] = 0; int plen = 0; @@ -531,7 +749,7 @@ bool RulesProcessEvent(char *json_event) //AddLog_P2(LOG_LEVEL_DEBUG, PSTR("RUL: Event %s"), event_saved.c_str()); for (uint32_t i = 0; i < MAX_RULE_SETS; i++) { - if (strlen(Settings.rules[i]) && bitRead(Settings.rule_enabled, i)) { + if (GetRuleLen(i) && bitRead(Settings.rule_enabled, i)) { if (RuleSetProcess(i, event_saved)) { serviced = true; } } } @@ -547,7 +765,7 @@ void RulesInit(void) { rules_flag.data = 0; for (uint32_t i = 0; i < MAX_RULE_SETS; i++) { - if (Settings.rules[i][0] == '\0') { + if (0 == GetRuleLen(i)) { bitWrite(Settings.rule_enabled, i, 0); bitWrite(Settings.rule_once, i, 0); } @@ -1727,7 +1945,8 @@ void CmndRule(void) { uint8_t index = XdrvMailbox.index; if ((index > 0) && (index <= MAX_RULE_SETS)) { - if ((XdrvMailbox.data_len > 0) && (XdrvMailbox.data_len < sizeof(Settings.rules[index -1]))) { + // if ((XdrvMailbox.data_len > 0) && (XdrvMailbox.data_len < sizeof(Settings.rules[index -1]))) { // TODO postpone size calculation + if (XdrvMailbox.data_len > 0) { // TODO postpone size calculation if ((XdrvMailbox.payload >= 0) && (XdrvMailbox.payload <= 10)) { switch (XdrvMailbox.payload) { case 0: // Off @@ -1753,24 +1972,24 @@ void CmndRule(void) break; } } else { - int offset = 0; + bool append = false; if ('+' == XdrvMailbox.data[0]) { - offset = strlen(Settings.rules[index -1]); - if (XdrvMailbox.data_len < (sizeof(Settings.rules[index -1]) - offset -1)) { // Check free space - XdrvMailbox.data[0] = ' '; // Remove + and make sure at least one space is inserted - } else { - offset = -1; // Not enough space so skip it - } + XdrvMailbox.data[0] = ' '; // Remove + and make sure at least one space is inserted + append = true; } - if (offset != -1) { - strlcpy(Settings.rules[index -1] + offset, ('"' == XdrvMailbox.data[0]) ? "" : XdrvMailbox.data, sizeof(Settings.rules[index -1])); + int32_t res = SetRule(index - 1, ('"' == XdrvMailbox.data[0]) ? "" : XdrvMailbox.data, append); + if (res < 0) { + AddLog_P2(LOG_LEVEL_ERROR, PSTR("RUL: not enough space")); } } Rules.triggers[index -1] = 0; // Reset once flag } + // snprintf_P (mqtt_data, sizeof(mqtt_data), PSTR("{\"%s%d\":\"%s\",\"Once\":\"%s\",\"StopOnError\":\"%s\",\"Free\":%d,\"Rules\":\"%s\"}"), + // XdrvMailbox.command, index, GetStateText(bitRead(Settings.rule_enabled, index -1)), GetStateText(bitRead(Settings.rule_once, index -1)), + // GetStateText(bitRead(Settings.rule_stop, index -1)), sizeof(Settings.rules[index -1]) - strlen(Settings.rules[index -1]) -1, Settings.rules[index -1]); snprintf_P (mqtt_data, sizeof(mqtt_data), PSTR("{\"%s%d\":\"%s\",\"Once\":\"%s\",\"StopOnError\":\"%s\",\"Free\":%d,\"Rules\":\"%s\"}"), XdrvMailbox.command, index, GetStateText(bitRead(Settings.rule_enabled, index -1)), GetStateText(bitRead(Settings.rule_once, index -1)), - GetStateText(bitRead(Settings.rule_stop, index -1)), sizeof(Settings.rules[index -1]) - strlen(Settings.rules[index -1]) -1, Settings.rules[index -1]); + GetStateText(bitRead(Settings.rule_stop, index -1)), sizeof(Settings.rules[0]) - GetRuleLenStorage(index - 1), GetRule(index - 1).c_str()); } }