From 7e1c62e78442c7a3d773a0bcea564c86f0469d3a Mon Sep 17 00:00:00 2001 From: Theo Arends <11044339+arendst@users.noreply.github.com> Date: Mon, 8 Jun 2020 12:52:05 +0200 Subject: [PATCH] Add Unishox tools --- tools/unishox/clipboard-const-converter.py | 112 +++++ tools/unishox/unishox.py | 520 +++++++++++++++++++++ tools/unishox/unishox.pyc | Bin 0 -> 14134 bytes 3 files changed, 632 insertions(+) create mode 100644 tools/unishox/clipboard-const-converter.py create mode 100644 tools/unishox/unishox.py create mode 100644 tools/unishox/unishox.pyc diff --git a/tools/unishox/clipboard-const-converter.py b/tools/unishox/clipboard-const-converter.py new file mode 100644 index 000000000..ee180d8a9 --- /dev/null +++ b/tools/unishox/clipboard-const-converter.py @@ -0,0 +1,112 @@ +from tkinter import Tk +import unishox + +# get text from clipboard expecting something like that: +# const char HTTP_SCRIPT_WIFI[] PROGMEM = +# "function c(l){" // comments +# "eb('s1').value=l.innerText||l.textContent;" // comments +# "eb('p1').focus();" // comments +# // comments +# "}"; + +text = Tk().clipboard_get() +# print(text) + +# parsing and cleaning +text_list = text.splitlines() +text = '' #just reuse the string +const_name = '' #default if no name will be found + +line_number = 0 +for line in text_list: + pos = line.find("const char") + # print(pos, line) + if pos > -1: + line_list = line.rsplit(" ") + for el in line_list: + if el.find('[]') > -1: + const_name = el[:-2] #extract the "const char" variable name + line_list.pop(line_number) + else: # remove line comments + line_el = line.rsplit("//") + # print('Splitted line list by //' % line_el) + # print(line_el[0]) + text = text + line_el[0] + line_number = line_number +1 + +# print const_name +# print text + +#remove unwanted quotation marks +qm = [] +pos =0 +last_char = "" +for char in text: + if char == "\"": + if last_char != "\\": + qm.append(pos) #find all quotation marks without preceding backslash + last_char = char + pos = pos + 1 +# print(qm) +lastel = 0 +input = "" +for pos in qm: + sub = text[lastel+1:pos:] + if not sub.isspace() and pos-lastel > 1: + # print(lastel, pos) + input = input + sub #only copy substrings that are not whitespace + # print(text[lastel+1:pos:]) + lastel = pos + +print("####### Parsing intput:") +print("Const char name: ",const_name) +print('####### Cleaned input:') +print(input) + +#construct output (taken from shadinger) +input = input.replace("\\t", "\t") +input = input.replace("\\n", "\n") +input = input.replace("\\r", "\r") +input = input.replace("\\f", "\f") +input = input.replace("\\b", "\b") +input = input.replace("\\\"", u"\u0022") + +in_bytes = bytearray(input, 'utf-8') +in_len = len(in_bytes) +out_bytes = bytearray(in_len * 2) + +UNISHOX = unishox.Unishox() +out_len = UNISHOX.compress(in_bytes, len(in_bytes), out_bytes, len(out_bytes)) +print("####### Compression result:") +print("Compressed from {i} to {o}, -{p:.1f}%".format(i=in_len, o=out_len, p=(100-out_len/in_len*100))) +out_bytes = out_bytes[:out_len] # truncate to right size + +#PROGMEM is growing in steps 0,8,24,40,56,... bytes of data resulting in size of 0,16,32,48,64,... bytes +for in_real in range(8,in_len+16,16): + if in_real>=in_len: + print("Old real PROGMEM-size:",in_real+8,"(unused bytes:",in_real-in_len,")") + break +for out_real in range(8,out_len+16,16): + if out_real>=out_len: + print("New real PROGMEM-size:",out_real+8,"(unused bytes:",out_real-out_len,")") + break +print("the optimal case would be raw bytes + 8, real difference: ", in_real - out_real, "bytes") +# https://www.geeksforgeeks.org/break-list-chunks-size-n-python/ +def chunked(my_list, n): + return [my_list[i * n:(i + 1) * n] for i in range((len(my_list) + n - 1) // n )] + +# split in chunks of 20 characters +chunks = chunked(out_bytes, 20) + +lines_raw = [ "\"\\x" + "\\x".join( [ '{:02X}'.format(b) for b in chunk ] ) + "\"" for chunk in chunks ] +line_complete = "const char " + const_name + "_COMPRESSED" +"[] PROGMEM = " + ("\n" + " "*29).join(lines_raw) + ";" +lines = "const size_t " + const_name +"_SIZE = {size};\n{lines}".format(size=in_len, lines=line_complete) + +print('####### Final output:') +print(lines) + +definition = "#define " + const_name + " Decompress(" + const_name + "_COMPRESSED" + "," + const_name +"_SIZE" + ").c_str()" +print(definition) + + +# maybe add export to clipboard for later ... \ No newline at end of file diff --git a/tools/unishox/unishox.py b/tools/unishox/unishox.py new file mode 100644 index 000000000..1cce14706 --- /dev/null +++ b/tools/unishox/unishox.py @@ -0,0 +1,520 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Python Class for compressing short strings. + +This class contains a highly modified and optimized version of Unishox +for Tasmota converted in C ported to Pyhton3. + +It was basically developed to individually compress and decompress small strings +(see https://github.com/siara-cc/Unishox) +In general compression utilities such as zip, gzip do not compress short strings +well and often expand them. They also use lots of memory which makes them unusable +in constrained environments like Arduino. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +class Unishox: + """ + This is a highly modified and optimized version of Unishox + for Tasmota, aimed at compressing `Rules` which are typically + short strings from 50 to 500 bytes. + + @author Stephan Hadinger + @revised Norbert Richter + """ + + # pylint: disable=bad-continuation,bad-whitespace,line-too-long + #cl_95 = [0x4000 + 3, 0x3F80 + 11, 0x3D80 + 11, 0x3C80 + 10, 0x3BE0 + 12, 0x3E80 + 10, 0x3F40 + 11, 0x3EC0 + 10, 0x3BA0 + 11, 0x3BC0 + 11, 0x3D60 + 11, 0x3B60 + 11, 0x3A80 + 10, 0x3AC0 + 10, 0x3A00 + 9, 0x3B00 + 10, 0x38C0 + 10, 0x3900 + 10, 0x3940 + 11, 0x3960 + 11, 0x3980 + 11, 0x39A0 + 11, 0x39C0 + 11, 0x39E0 + 12, 0x39F0 + 12, 0x3880 + 10, 0x3CC0 + 10, 0x3C00 + 9, 0x3D00 + 10, 0x3E00 + 9, 0x3F00 + 10, 0x3B40 + 11, 0x3BF0 + 12, 0x2B00 + 8, 0x21C0 + 11, 0x20C0 + 10, 0x2100 + 10, 0x2600 + 7, 0x2300 + 11, 0x21E0 + 12, 0x2140 + 11, 0x2D00 + 8, 0x2358 + 13, 0x2340 + 12, 0x2080 + 10, 0x21A0 + 11, 0x2E00 + 8, 0x2C00 + 8, 0x2180 + 11, 0x2350 + 13, 0x2F80 + 9, 0x2F00 + 9, 0x2A00 + 8, 0x2160 + 11, 0x2330 + 12, 0x21F0 + 12, 0x2360 + 13, 0x2320 + 12, 0x2368 + 13, 0x3DE0 + 12, 0x3FA0 + 11, 0x3DF0 + 12, 0x3D40 + 11, 0x3F60 + 11, 0x3FF0 + 12, 0xB000 + 4, 0x1C00 + 7, 0x0C00 + 6, 0x1000 + 6, 0x6000 + 3, 0x3000 + 7, 0x1E00 + 8, 0x1400 + 7, 0xD000 + 4, 0x3580 + 9, 0x3400 + 8, 0x0800 + 6, 0x1A00 + 7, 0xE000 + 4, 0xC000 + 4, 0x1800 + 7, 0x3500 + 9, 0xF800 + 5, 0xF000 + 5, 0xA000 + 4, 0x1600 + 7, 0x3300 + 8, 0x1F00 + 8, 0x3600 + 9, 0x3200 + 8, 0x3680 + 9, 0x3DA0 + 11, 0x3FC0 + 11, 0x3DC0 + 11, 0x3FE0 + 12] + cl_95 = [0x4000 + 3, 0x3F80 + 11, 0x3D80 + 11, 0x3C80 + 10, 0x3BE0 + 12, 0x3E80 + 10, 0x3F40 + 11, 0x3EC0 + 10, 0x3BA0 + 11, 0x3BC0 + 11, 0x3D60 + 11, 0x3B60 + 11, 0x3A80 + 10, 0x3AC0 + 10, 0x3A00 + 9, 0x3B00 + 10, 0x38C0 + 10, 0x3900 + 10, 0x3940 + 11, 0x3960 + 11, 0x3980 + 11, 0x39A0 + 11, 0x39C0 + 11, 0x39E0 + 12, 0x39F0 + 12, 0x3880 + 10, 0x3CC0 + 10, 0x3C00 + 9, 0x3D00 + 10, 0x3E00 + 9, 0x3F00 + 10, 0x3B40 + 11, 0x3BF0 + 12, 0x2B00 + 8, 0x21C0 + 11, 0x20C0 + 10, 0x2100 + 10, 0x2600 + 7, 0x2300 + 11, 0x21E0 + 12, 0x2140 + 11, 0x2D00 + 8, 0x46B0 + 13, 0x2340 + 12, 0x2080 + 10, 0x21A0 + 11, 0x2E00 + 8, 0x2C00 + 8, 0x2180 + 11, 0x46A0 + 13, 0x2F80 + 9, 0x2F00 + 9, 0x2A00 + 8, 0x2160 + 11, 0x2330 + 12, 0x21F0 + 12, 0x46C0 + 13, 0x2320 + 12, 0x46D0 + 13, 0x3DE0 + 12, 0x3FA0 + 11, 0x3DF0 + 12, 0x3D40 + 11, 0x3F60 + 11, 0x3FF0 + 12, 0xB000 + 4, 0x1C00 + 7, 0x0C00 + 6, 0x1000 + 6, 0x6000 + 3, 0x3000 + 7, 0x1E00 + 8, 0x1400 + 7, 0xD000 + 4, 0x3580 + 9, 0x3400 + 8, 0x0800 + 6, 0x1A00 + 7, 0xE000 + 4, 0xC000 + 4, 0x1800 + 7, 0x3500 + 9, 0xF800 + 5, 0xF000 + 5, 0xA000 + 4, 0x1600 + 7, 0x3300 + 8, 0x1F00 + 8, 0x3600 + 9, 0x3200 + 8, 0x3680 + 9, 0x3DA0 + 11, 0x3FC0 + 11, 0x3DC0 + 11, 0x3FE0 + 12] + + # enum {SHX_STATE_1 = 1, SHX_STATE_2}; // removed Unicode state + SHX_STATE_1 = 1 + SHX_STATE_2 = 2 + + SHX_SET1 = 0 + SHX_SET1A = 1 + SHX_SET1B = 2 + SHX_SET2 = 3 + + sets = [['\0', ' ', 'e', '\0', 't', 'a', 'o', 'i', 'n', 's', 'r'], + ['\0', 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b', 'g', 'w'], + ['f', 'y', 'v', 'k', 'q', 'j', 'x', 'z', '\0', '\0', '\0'], + ['\0', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'], + ['.', ',', '-', '/', '?', '+', ' ', '(', ')', '$', '@'], + [';', '#', ':', '<', '^', '*', '"', '{', '}', '[', ']'], + ['=', '%', '\'', '>', '&', '_', '!', '\\', '|', '~', '`']] + + us_vcode = [2 + (0 << 3), 3 + (3 << 3), 3 + (1 << 3), 4 + (6 << 3), 0, + # 5, 6, 7, 8, 9, 10 + 4 + (4 << 3), 3 + (2 << 3), 4 + (8 << 3), 0, 0, 0, + # 11, 12, 13, 14, 15 + 4 + (7 << 3), 0, 4 + (5 << 3), 0, 5 + (9 << 3), + # 16, 17, 18, 19, 20, 21, 22, 23 + 0, 0, 0, 0, 0, 0, 0, 0, + # 24, 25, 26, 27, 28, 29, 30, 31 + 0, 0, 0, 0, 0, 0, 0, 5 + (10 << 3) ] + # 0, 1, 2, 3, 4, 5, 6, 7, + us_hcode = [1 + (1 << 3), 2 + (0 << 3), 0, 3 + (2 << 3), 0, 0, 0, 5 + (3 << 3), + # 8, 9, 10, 11, 12, 13, 14, 15, + 0, 0, 0, 0, 0, 0, 0, 5 + (5 << 3), + # 16, 17, 18, 19, 20, 21, 22, 23 + 0, 0, 0, 0, 0, 0, 0, 5 + (4 << 3), + # 24, 25, 26, 27, 28, 29, 30, 31 + 0, 0, 0, 0, 0, 0, 0, 5 + (6 << 3) ] + # pylint: enable=bad-continuation,bad-whitespace + + ESCAPE_MARKER = 0x2A + + TERM_CODE = 0x37C0 + # TERM_CODE_LEN = 10 + DICT_CODE = 0x0000 + DICT_CODE_LEN = 5 + #DICT_OTHER_CODE = 0x0000 + #DICT_OTHER_CODE_LEN = 6 + RPT_CODE_TASMOTA = 0x3780 + RPT_CODE_TASMOTA_LEN = 10 + BACK2_STATE1_CODE = 0x2000 + BACK2_STATE1_CODE_LEN = 4 + #BACK_FROM_UNI_CODE = 0xFE00 + #BACK_FROM_UNI_CODE_LEN = 8 + LF_CODE = 0x3700 + LF_CODE_LEN = 9 + TAB_CODE = 0x2400 + TAB_CODE_LEN = 7 + ALL_UPPER_CODE = 0x2200 + ALL_UPPER_CODE_LEN = 8 + SW2_STATE2_CODE = 0x3800 + SW2_STATE2_CODE_LEN = 7 + ST2_SPC_CODE = 0x3B80 + ST2_SPC_CODE_LEN = 11 + BIN_CODE_TASMOTA = 0x8000 + BIN_CODE_TASMOTA_LEN = 3 + + NICE_LEN = 5 + + mask = [0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF] + + # pylint: disable=missing-function-docstring,invalid-name + + # Input + # out = bytearray + def append_bits(self, out, ol, code, clen, state): + #print("Append bits {ol} {code} {clen} {state}".format(ol=ol, code=code, clen=clen, state=state)) + if state == self.SHX_STATE_2: + # remove change state prefix + if (code >> 9) == 0x1C: + code <<= 7 + clen -= 7 + while clen > 0: + cur_bit = ol % 8 + blen = 8 if (clen > 8) else clen + a_byte = (code >> 8) & self.mask[blen - 1] + #print("append_bits a_byte {ab} blen {blen}".format(ab=a_byte,blen=blen)) + a_byte >>= cur_bit + if blen + cur_bit > 8: + blen = (8 - cur_bit) + if cur_bit == 0: + out[ol // 8] = a_byte + else: + out[ol // 8] |= a_byte + code <<= blen + ol += blen + if 0 == ol % 8: # pylint: disable=misplaced-comparison-constant + # we completed a full byte + last_c = out[(ol // 8) - 1] + if last_c in (0, self.ESCAPE_MARKER): + out[ol // 8] = 1 + last_c # increment to 0x01 or 0x2B + out[(ol // 8) -1] = self.ESCAPE_MARKER # replace old value with marker + ol += 8 # add one full byte + clen -= blen + return ol + + codes = [0x82, 0xC3, 0xE5, 0xED, 0xF5] # pylint: disable=bad-whitespace + bit_len = [ 5, 7, 9, 12, 16] # pylint: disable=bad-whitespace + + def encodeCount(self, out, ol, count): + #print("encodeCount ol = {ol}, count = {count}".format(ol=ol, count=count)) + till = 0 + base = 0 + for i in range(len(self.bit_len)): + bit_len_i = self.bit_len[i] + till += (1 << bit_len_i) + if count < till: + codes_i = self.codes[i] + ol = self.append_bits(out, ol, (codes_i & 0xF8) << 8, codes_i & 0x07, 1) + #print("encodeCount append_bits ol = {ol}, code = {code}, len = {len}".format(ol=ol,code=(codes_i & 0xF8) << 8,len=codes_i & 0x07)) + ol = self.append_bits(out, ol, (count - base) << (16 - bit_len_i), bit_len_i, 1) + #print("encodeCount append_bits ol = {ol}, code = {code}, len = {len}".format(ol=ol,code=(count - base) << (16 - bit_len_i),len=bit_len_i)) + return ol + base = till + return ol + + # Returns (int, ol, state, is_all_upper) + def matchOccurance(self, inn, len_, l_, out, ol, state, is_all_upper): + # int j, k; + longest_dist = 0 + longest_len = 0 + #for (j = l_ - self.NICE_LEN; j >= 0; j--) { + j = l_ - self.NICE_LEN + while j >= 0: + k = l_ + #for (k = l_; k < len && j + k - l_ < l_; k++) { + while k < len_ and j + k - l_ < l_: + if inn[k] != inn[j + k - l_]: + break + k += 1 + if k - l_ > self.NICE_LEN - 1: + match_len = k - l_ - self.NICE_LEN + match_dist = l_ - j - self.NICE_LEN + 1 + if match_len > longest_len: + longest_len = match_len + longest_dist = match_dist + j -= 1 + + if longest_len: + #print("longest_len {ll}".format(ll=longest_len)) + #ol_save = ol + if state == self.SHX_STATE_2 or is_all_upper: + is_all_upper = 0 + state = self.SHX_STATE_1 + ol = self.append_bits(out, ol, self.BACK2_STATE1_CODE, self.BACK2_STATE1_CODE_LEN, state) + + ol = self.append_bits(out, ol, self.DICT_CODE, self.DICT_CODE_LEN, 1) + ol = self.encodeCount(out, ol, longest_len) + ol = self.encodeCount(out, ol, longest_dist) + #print("longest_len {ll} longest_dist {ld} ol {ols}-{ol}".format(ll=longest_len, ld=longest_dist, ol=ol, ols=ol_save)) + l_ += longest_len + self.NICE_LEN + l_ -= 1 + + return l_, ol, state, is_all_upper + return -l_, ol, state, is_all_upper + + + def compress(self, inn, len_, out, len_out): + ol = 0 + state = self.SHX_STATE_1 + is_all_upper = 0 + l = 0 + while l < len_: + # for (l=0; l 0: + #print("matchOccurance l = {l} l_old = {lo}".format(l=l,lo=l_old)) + l += 1 # for loop + continue + + l = -l + + if state == self.SHX_STATE_2: # if Set2 + if ord(' ') <= c_in <= ord('@') or ord('[') <= c_in <= ord('`') or ord('{') <= c_in <= ord('~'): + pass + else: + state = self.SHX_STATE_1 # back to Set1 and lower case + ol = self.append_bits(out, ol, self.BACK2_STATE1_CODE, self.BACK2_STATE1_CODE_LEN, state) + + is_upper = 0 + if ord('A') <= c_in <= ord('Z'): + is_upper = 1 + else: + if is_all_upper: + is_all_upper = 0 + ol = self.append_bits(out, ol, self.BACK2_STATE1_CODE, self.BACK2_STATE1_CODE_LEN, state) + + if 32 <= c_in <= 126: + if is_upper and not is_all_upper: + ll = l+5 + # for (ll=l+5; ll>=l && ll ord('Z'): + break + + ll -= 1 + + if ll == l-1: + ol = self.append_bits(out, ol, self.ALL_UPPER_CODE, self.ALL_UPPER_CODE_LEN, state) # CapsLock + is_all_upper = 1 + + if state == self.SHX_STATE_1 and ord('0') <= c_in <= ord('9'): + ol = self.append_bits(out, ol, self.SW2_STATE2_CODE, self.SW2_STATE2_CODE_LEN, state) # Switch to sticky Set2 + state = self.SHX_STATE_2 + + c_in -= 32 + if is_all_upper and is_upper: + c_in += 32 + if c_in == 0 and state == self.SHX_STATE_2: + ol = self.append_bits(out, ol, self.ST2_SPC_CODE, self.ST2_SPC_CODE_LEN, state) # space from Set2 ionstead of Set1 + else: + # ol = self.append_bits(out, ol, pgm_read_word(&c_95[c_in]), pgm_read_byte(&l_95[c_in]), state); // original version with c/l in split arrays + cl = self.cl_95[c_in] + cl_code = cl & 0xFFF0 + cl_len = cl & 0x000F + if cl_len == 13: + cl_code = cl_code >> 1 + ol = self.append_bits(out, ol, cl_code, cl_len, state) + + elif c_in == 10: + ol = self.append_bits(out, ol, self.LF_CODE, self.LF_CODE_LEN, state) # LF + elif c_in == '\t': + ol = self.append_bits(out, ol, self.TAB_CODE, self.TAB_CODE_LEN, state) # TAB + else: + ol = self.append_bits(out, ol, self.BIN_CODE_TASMOTA, self.BIN_CODE_TASMOTA_LEN, state) # Binary, we reuse the Unicode marker which 3 bits instead of 9 + ol = self.encodeCount(out, ol, (255 - c_in) & 0xFF) + + + # check that we have some headroom in the output buffer + if ol // 8 >= len_out - 4: + return -1 # we risk overflow and crash + + l += 1 + + bits = ol % 8 + if bits: + ol = self.append_bits(out, ol, self.TERM_CODE, 8 - bits, 1) # 0011 0111 1100 0000 TERM = 0011 0111 11 + return (ol + 7) // 8 + # return ol // 8 + 1 if (ol%8) else 0 + + + def getBitVal(self, inn, bit_no, count): + c_in = inn[bit_no >> 3] + if bit_no >> 3 and self.ESCAPE_MARKER == inn[(bit_no >> 3) - 1]: + c_in -= 1 + r = 1 << count if (c_in & (0x80 >> (bit_no % 8))) else 0 + #print("getBitVal r={r}".format(r=r)) + return r + + # Returns: + # 0..11 + # or -1 if end of stream + def getCodeIdx(self, code_type, inn, len_, bit_no_p): + code = 0 + count = 0 + while count < 5: + if bit_no_p >= len_: + return -1, bit_no_p + # detect marker + if self.ESCAPE_MARKER == inn[bit_no_p >> 3]: + bit_no_p += 8 # skip marker + + if bit_no_p >= len_: + return -1, bit_no_p + + code += self.getBitVal(inn, bit_no_p, count) + bit_no_p += 1 + count += 1 + code_type_code = code_type[code] + if code_type_code and (code_type_code & 0x07) == count: + #print("getCodeIdx = {r}".format(r=code_type_code >> 3)) + return code_type_code >> 3, bit_no_p + + #print("getCodeIdx not found = {r}".format(r=1)) + return 1, bit_no_p + + def getNumFromBits(self, inn, bit_no, count): + ret = 0 + while count: + count -= 1 + if self.ESCAPE_MARKER == inn[bit_no >> 3]: + bit_no += 8 # skip marker + ret += self.getBitVal(inn, bit_no, count) + bit_no += 1 + return ret + + def readCount(self, inn, bit_no_p, len_): + (idx, bit_no_p) = self.getCodeIdx(self.us_hcode, inn, len_, bit_no_p) + if idx >= 1: + idx -= 1 # we skip v = 1 (code '0') since we no more accept 2 bits encoding + if idx >= 5 or idx < 0: + return 0, bit_no_p # unsupported or end of stream + till = 0 + bit_len_idx = 0 + base = 0 + #for (uint32_t i = 0; i <= idx; i++) { + i = 0 + while i <= idx: + # for i in range(idx): + base = till + bit_len_idx = self.bit_len[i] + till += (1 << bit_len_idx) + i += 1 + + count = self.getNumFromBits(inn, bit_no_p, bit_len_idx) + base + #print("readCount getNumFromBits = {count} ({bl})".format(count=count,bl=bit_len_idx)) + + bit_no_p += bit_len_idx + return count, bit_no_p + + def decodeRepeat(self, inn, len_, out, ol, bit_no): + #print("decodeRepeat Enter") + (dict_len, bit_no) = self.readCount(inn, bit_no, len_) + dict_len += self.NICE_LEN + (dist, bit_no) = self.readCount(inn, bit_no, len_) + dist += self.NICE_LEN - 1 + #memcpy(out + ol, out + ol - dist, dict_len); + i = 0 + while i < dict_len: + #for i in range(dict_len): + out[ol + i] = out[ol - dist + i] + i += 1 + ol += dict_len + + return ol, bit_no + + def decompress(self, inn, len_, out, len_out): + ol = 0 + bit_no = 0 + dstate = self.SHX_SET1 + is_all_upper = 0 + + len_ <<= 3 # *8, len_ in bits + out[ol] = 0 + while bit_no < len_: + c = 0 + is_upper = is_all_upper + (v, bit_no) = self.getCodeIdx(self.us_vcode, inn, len_, bit_no) # read vCode + #print("bit_no {b}. v = {v}".format(b=bit_no,v=v)) + if v < 0: + break # end of stream + h = dstate # Set1 or Set2 + if v == 0: # Switch which is common to Set1 and Set2, first entry + (h, bit_no) = self.getCodeIdx(self.us_hcode, inn, len_, bit_no) # read hCode + #print("bit_no {b}. h = {h}".format(b=bit_no,h=h)) + if h < 0: + break # end of stream + if h == self.SHX_SET1: # target is Set1 + if dstate == self.SHX_SET1: # Switch from Set1 to Set1 us UpperCase + if is_all_upper: # if CapsLock, then back to LowerCase + is_upper = 0 + is_all_upper = 0 + continue + + (v, bit_no) = self.getCodeIdx(self.us_vcode, inn, len_, bit_no) # read again vCode + if v < 0: + break # end of stream + if v == 0: + (h, bit_no) = self.getCodeIdx(self.us_hcode, inn, len_, bit_no) # read second hCode + if h < 0: + break # end of stream + if h == self.SHX_SET1: # If double Switch Set1, the CapsLock + is_all_upper = 1 + continue + + is_upper = 1 # anyways, still uppercase + else: + dstate = self.SHX_SET1 # if Set was not Set1, switch to Set1 + continue + + elif h == self.SHX_SET2: # If Set2, switch dstate to Set2 + if dstate == self.SHX_SET1: + dstate = self.SHX_SET2 + continue + + if h != self.SHX_SET1: # all other Sets (why not else) + (v, bit_no) = self.getCodeIdx(self.us_vcode, inn, len_, bit_no) # we changed set, now read vCode for char + if v < 0: + break # end of stream + + if v == 0 and h == self.SHX_SET1A: + #print("v = 0, h = self.SHX_SET1A") + if is_upper: + (temp, bit_no) = self.readCount(inn, bit_no, len_) + out[ol] = 255 - temp # binary + ol += 1 + else: + (ol, bit_no) = self.decodeRepeat(inn, len_, out, ol, bit_no) # dist + continue + + if h == self.SHX_SET1 and v == 3: + # was Unicode, will do Binary instead + (temp, bit_no) = self.readCount(inn, bit_no, len_) + out[ol] = 255 - temp # binary + ol += 1 + continue + + if h < 7 and v < 11: + #print("h {h} v {v}".format(h=h,v=v)) + c = ord(self.sets[h][v]) + if ord('a') <= c <= ord('z'): + if is_upper: + c -= 32 # go to UpperCase for letters + else: # handle all other cases + if is_upper and dstate == self.SHX_SET1 and v == 1: + c = ord('\t') # If UpperCase Space, change to TAB + if h == self.SHX_SET1B: + if 8 == v: # was LF or RPT, now only LF # pylint: disable=misplaced-comparison-constant + out[ol] = ord('\n') + ol += 1 + continue + + if 9 == v: # was CRLF, now RPT # pylint: disable=misplaced-comparison-constant + (count, bit_no) = self.readCount(inn, bit_no, len_) + count += 4 + if ol + count >= len_out: + return -1 # overflow + + rpt_c = out[ol - 1] + while count: + count -= 1 + out[ol] = rpt_c + ol += 1 + continue + + if 10 == v: # pylint: disable=misplaced-comparison-constant + break # TERM, stop decoding + + out[ol] = c + ol += 1 + + if ol >= len_out: + return -1 # overflow + + return ol + + # pylint: enable=missing-function-docstring + + +if __name__ == "__main__": + # pylint: disable=line-too-long + UNISHOX = Unishox() + BYTES_ = bytearray(2048) + INN = bytearray(b'ON Switch1#State==1 DO Add1 1 ENDON ON Var1#State==0 DO ShutterStop1 ENDON ON Var1#State==1 DO ShutterClose1 ENDON ON Var1#State>=2 DO Var1 0 ENDON ON Shutter1#Close DO Var1 0 ENDON ON Switch2#State==1 DO Add2 1 ENDON ON Var2#State==0 DO ShutterStop1 ENDON ON Var2#State==1 DO ShutterOpen1 ENDON ON Var2#State>=2 DO Var2 0 ENDON ON Shutter1#Open DO Var2 0 ENDON') + LEN_ = UNISHOX.compress(INN, len(INN), BYTES_, len(BYTES_)) + print("Compressed from {fromm} to {to} ({p}%)".format(fromm=len(INN), to=LEN_, p=(100-LEN_/len(INN)*100))) + + OUT = bytearray(2048) + LEN_ = UNISHOX.decompress(BYTES_, LEN_, OUT, len(OUT)) + print(str(OUT, 'utf-8').split('\x00')[0]) diff --git a/tools/unishox/unishox.pyc b/tools/unishox/unishox.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2c127f091cd9e566505df0886064be9f61e9950 GIT binary patch literal 14134 zcmcgzdvILWS^w_cl`O3ue#Ew<*iCM1Cs{kTY%6i>B#tdvR$|qT?C#2OvXQf1@41q0 zEbU77UfEJ2rAl3BL*Ebjrnr>06iR7%(-(#k+CmEf3V&pn!808O7>1e507GYB2*2NV z_9Z*e!VH$Q-#w4-ob#RYJ(x_BfcvpAs1W;|4w{|t+|BwNij*e>`1Oj%S~Et7C#}+tP&rE7D6ESKtNd$E+-|| zDyCVUNr?ek6~~}}cKoGSsI_lEeSP<3Nj!S2zF?a!(ykK&7gdaI<5G5dzk^kS7FV9> z6w}Tw(;+>N9xGvMosvzj=BEEWA(A?^5Kop%-r!UrioEf1=oQP;Goc?vL21H^Cd*;$ zMRABq)SF6;P6m-zRMcX*6c>V0PfT8O60aRJMffDr>R02SU07DPENd#4sA<8tX> zyQ82G7IqbjyDNllPYsv63BTlrg{k_vU@voVFcrjsA9>MSanb{E&jvF)y$L|il)X|p zuA#3y!_*}obm#$%$9~E4=Vw@nC;jPOZ*k(eJhW+lx*RTemnH+O zG+nrWMTzdsmFA+t`6)kzbHgEmNni(##4lY8!g6WaFJb&taKZNmLNgbX$~egrLD4To zK$#+y9~&w9 zGckF{GoK0yrK0a$!qMs?l_`2tPgW)pI3KV9 zLO_KhfdS9XH@&GWB((8Ns4}caGky_TR>Y1<6LW3Vn-c-B_MxY|;MHKbt9N-arqLa-cJ}ez92OumU3a)g)5W?(rnI`*arWNag z^@8c@g00dTY!6Y|ZLP!BV29Ya+CJ9U$6AZB&OUAtvmPiLY;B``blFF@9nfPRx7x=h z``BzBZ?Ht%W*@iPah`qLVW-|=Yg=t?o2}hxhu>w#-7O{!BDUMc9Tw#t``9UF7y5c_ z-)=i#kFD*sx-IAEHm>DrB%!&!cL}Fqwb7C%v zxg-;iTy)KgSrGHAnCHYSidho#yqGtOSr+qx;=D!7Wif9R^AlozQq0@Lyj{$TIu^rA z)p>qO%sa%qQ_Q=>yj#qB#JpF``^3Co%m)8HT+f4F<6UeO4o6KW-)iw+ko+O zy@ulYdd=ehI1uB&KrT7qNv=)6QT{0n4I9B!*h`hT+QdqLMXt3q7SOLK*neYlt&C1= zLl>*HH5SlcIVn=tF6P%I*CFONB-g1DxSI3l)`~>s6!SZhgQ43cxo$CEkz5Z?ICra{mHOaxm!GXL@%-1D%JBiDAx(dwO zH^ls|n5$xbPt5O&`KFjZ5c85uv`B7?m_HP_v$?Hez9r_{V!k8hkHq}3Of*Suo1Vii zBIE-#nrFU-M74!vyb&rIUdKUOf9-w$nP=wA%z zQ;n3v^YCQ73>>w%(it$k2ol(GuzVPxO-m26dJur|M{w;SfOhRTgi=58uCjE69ajm? zal|>6mLFsrfe^HEm?+)>mL34;Cpbs2OmKzZD#0~^R|%Ghe3cZ40!}YnQn+|O=X;ev z?g41w8dvWi@Yu$Z+yy}YA&{*A*Vxvt9lKEa(jx#J?CodoGKXE!-d>bsC)-%PLcHbO zY*VlUrE~1q!(p$obd@L`t1q*3jg(#?k*~7Uujisw0{Id^QmNX^Dh@Sh*6JEo&k^h) zxQ$>v!OH;4`&hb<0DBGOR)T8;R|&ca6y=8~HR*h>vh51NMuK|@ZYQ{(pbub~r7NWG zDoMM_(lwSGPT=Y+01>Ma6M71Lg|Whd!m`3ZVM$@6FjP3Du&B@|oK!fca7N*@!t)9z z6kbv|u5dx&MTHj>&MJIH;k?3U6+WPFkHWnQ`xM@*@IHn66yC3Jzrq6wdll|fxJ%(~ zg@+a1qcE*-yTWY>`xQQYKG@#hr`VPalpc0EQk_U16kkFA(%why!Bvdy)4#hMOS(-;)4l?Ji6E zmw=zFjM~VF&N}sRO3qTNEaF}Q5~y@#9LLa1G<2f9tB!rpBGel={GcU>>&%{YNHQi` zWzX!OoI!6kO*=MXP3dM7D|U?~HGS00bOIoz_0Ar9BA*={7#+&@#pK#_A-WLL)(vF` z2Tl#;9~;OVAIhZ3RPB%asqvVj%X4u|wJ%R;d$DYMTbM$mPOB0jR`WF|E6#=a^FgfL zh+a!E#iWqum83-^VB&l+jYd%rJ&DrbgJY*71n*;`lYV*Z(W7B-(H}cg4lj&79z@8F zWaBv#l*cN0moel}#wN@zJa%0?(0gz1p0T)Go{GliY>uY-?44QAZ6lWUONLZMvsf&G z85MC?yK9`3v&LBs*zNSVEO$8V&RTapT2OP>IU5{));OI`@D_{-zDe-61ph?vuLNrV ziabr;XJm)!!q-rMpuV2}M^fHwols1sVo7NjGJ0m2@8hsG~uYuny>) z-LgWku#1Y-R|}vrrB^3+ohIj%{UHK+UnPYcCjt&TUlk6DOO$yNNAQd?j}jas7zWS@ zisiYIl^JBGtdJpP=4U88F$Y!V%Li7-IAcDzsZ!*ZxQ{^+`T-CWbpwdo>@+)@Avc|n zoDS!1_ii@{*(vg%I`JX>Di2?WCQ=f>A3&G|Fash-0U%39r3xpb!{~a;sd% z>0Q^@QfZyEP$1*2?8>^ol<&jO!&%q>y+Avjy>cS*5J*dQ28!%C(hf8#E zEOl_Y5Emy^t9e7%8i(C9HRO(U*^@LlghqBbYhk_H zp`qstwNgviBJ(dc(ZR54 z`PnAwUJEQT#HvNY8OQdic0p}XM~9-oLU;GjNm$v|bwe96jW@J4)!Se|sF5%lJ5=i} zAusDHDcAxiDHcH;Dw%-pLnS{4m4tn0Q3YM0map z)ToxYUurn^xNug93J!-r)h&YBhwh4M;`maa494R&nVnI`Mo*L@T5v%nq>J|L6^9M> z{IC*G>|Cm&msiOcF02;v2^^wPQPWDB_A8Vi;#zq$#TP^M2+OrA0vXG2o zSkNxBy@@ltb(EFe6lyGc-5zkGp#?})!Q{?Y$?QvsI;(9cvOP%>)~;a9za`kVZpGnk zPhP(U?mn!6gQ>2;D%jDi;so!$6XZflp-3n6kN~xHKUzr!DtZZq??|qYK2)#U&T3t1 zO?|1^7n+E(mXq}G7Dkfp7U^FCH7Kn|2}6*q+n`GSQgRXLyM!D@KNoJC9V38R-9qV; zO4ddlc8jjjC>rZ@>@5xWCB^Sj{J|UW5ien_4Gs8*R<4fTjoQ*x$KjBweK6gezZ;MK zij6=fyhm=urLLwFZqY}Z%&oYV8x{Ku%oDISF5$%1Nch(`(1nC#54&hm>V^x0HX$g) z^x5=gSpCYSP8Yr*!I3~U!|*AC^p0eZd6Eq8hYVt8$s7UD0DC}TF7VJ0JWs$2CG!RX zP9D69SUKP;YnxQ!&8e|^s|L|FN_!1I8o8zQ!ad8ubSBh!$UMzb9w4UEojGN_g8b+} z_OX+r12IxTH#F9qgv=OwVQ0j~aiNJjQ5iUKB7geSsiBN@FxK7Bq+STA&Yr3GEPWRB zmK)m?m%c@I6u74bEh1@cY)~woO>rv!z&`cjPds8#+t6AmD;BTf(SakjlM`1f+RKTL z43B82FQqA_$W@OpD#4NYNV@kPoAE^hmR=3OD<+LLX_xb&c!owU=cvClS_ z(ND^~-P!3R-A;H^cL0CC!j#5Do1AS4VBnV#(rs?Lvkj$f@WS4ZT(4s=yK|?bF*Hw# zN2H$EI10Q!kY+&+sfXdPEr$JSOd5M>Ce&?0ml_RbmS_Y~!A*y8N!fU~Kg26NwRDG& zM?$1bFRuw{6s|jBbKTVWV3|Od_^cw#w34<1}%0IM5`x!3+7c)|%kURnHnO8b{(y7Y@BL(t3uO#cqF;(u%+#!7 zxjako&4AWq^B}lSabqFQTP6KK;Ci->)hkoau9h=QXV`_(+-1d>bF+b()dWk*No6c^ z6KSewhV0C|$SQknAjQL^#^&*|#n04igob&R&H0(k8;MSCt0}FatE#8bd{fk#Oa|Kr zLF}-ZA0*#Bt_@X!QbWN~ZkN*mWlOnGWc}Ufu1lzr@o;L=Fo_D~ASv@t(ZddlgpdY} ziY?cz4#Kfeu0LE+C*~!shEX_xV_A%=mi)EK%IpKOm#v6E5L9czs3C^73diHWo+Tu& zu4b9Hv#LiMrO;g_jVwWJFis2=K>DnHZ;Sw?6gyebErLb!AZoC2*_!g5o!6T%|4pxf*HkD!KGiO+# zV?edz%^aZd38JY5rYzhvU87Y9{esclAHGB>(H67_?>Ml&8_GqcYsP)J8Xx8YJKdfH zvVbI&gKIb1XHcmK1dbO6k34NcMFAl!s@33BO<_&<8u3;so=u@Ss`otZJsR6`lBNQgdI)cB#n(%@zG*-cvowhCFV~ zNH%1fsIJO+6BKP4m?ywgk#g^*F^;|?zeowu^Jn}*yhPs7Ulw;0__PVRe)8;A^?dZA zq+XM2dQmhdQ9n(`}N2^T#yCvv=u2l88_hWGRq;u_gcz z`cqRbYY5t?!OYge5P)~j+3YQNjnLM%;`Zy*lmSLwr=NBTXY*syc-{_fq;c7?77T>U zE;BR4DpFYA)_IXGL2wTmk=d}L>2k5c#=PIi*4?y**15! z)8$=PW5zq(3I%w#h_gjC1skCp0vm7ufHmbtrW^(u%GIICdkP9Z*cy#5=p$3G0gM7Q zDo7ZPfOQONt?8}n_kb0zxg-XHX3Lo2f4?qw1c|{GAav^zoX{GNbqT01seh=2Hl)JBQwbhOMpGfJnjo3YBulQt)@5dq>Vk+W7RK#{oh<8A znn9~%Pb}Y3+M7w@i{U^f=2sQdX2Y!RtvFJO4)P@y_poMKZakk()fwbWI>8BKNML9? z@rngPh1*z?$ba!YfStFy2f3E{G%>;hl|;({4uVs9!}@EJXDrK*B&*PnNuJ^f;KXsJ z*AL*zcGW)Nsu;}VMpgsXio>LitQurw6e=%zc|m4qTQi&3<^fca3oX@R%Dx;Xl68oA z^`<#cEwsK89va=d;-1NNU z?M=aCiH)`Ox}eoH`Wo4W-MY0Gh_jJ^??7~k*q@%Ut{~kF4aINTeZr%we=+}EyiLA< zChK&!GDe0kfHx5My#uay63_NZX?3NYeggf5BzyEp{8mKT98WB~ufUUiDR&d@r%kw> zIxwagHI^BRcjL}VW850F+cA8X-HbIi;kkxUHQscPnoez9Mat0I;-+_lUgdl~Un)%d z`8;x&M(_!Oj}m;G;3EW-7aLVAv2>20 zKrl@J$(MR6D|3m}d4dIkX9;EqW(h)qh#)4IBY2JgR>OKAnIV?mL~xowT}!?e(_~lX zZ7jV=Krd8%&IWnCg0{NQXzBm=YX})ED4X$#xaH=S=HzSgntUo@y++DQv%3v>k?QXb zcelF(?~=RSyW9h=hu^yr@Gl_^%^btu>eJTh5LmZ0x8BY-T&?R`)2$a;+giI@Q~0c@ zuzec;y^zoA_r)SQ?k7jQEdJY}IJtLgmS5v|_~E_Y(UabQF?+qe-q6TVAmH!uLRdrC zLxk)kz7^t!*|_&{$S-_l<=iX{nqAlgwsp>>gIEC zdC5yZH?wr-cKuR>_GmsaE@zmG4Gcwyc5f~o-*rGuy^Xj=^uMAqpGKF;TIN$IsVbiy z8O|O%`Gi_!ehH@#hJ^*qTOE0FbSRtGh--Lc#Hvh|!)VFz7CNt3_^K*ixhfX^IV5w1 z2)v5$-zN5(KlrDQhH(5QD3gluL2IuL;rosf@rG!2Z+T6A&<-=&j^Az0KA5}Poc{uE Co^0d* literal 0 HcmV?d00001