Berry solidification toggle for string literals

This commit is contained in:
Stephan Hadinger 2022-04-09 09:50:17 +02:00
parent 3432c6841d
commit a5c852b047
2 changed files with 142 additions and 41 deletions

View File

@ -33,6 +33,12 @@ extern "C" {
.next = (uint32_t)(_next) & 0xFFFFFF \ .next = (uint32_t)(_next) & 0xFFFFFF \
} }
#define be_const_key_literal(_str, _next) { \
.v.c = be_str_literal(_str), \
.type = BE_STRING, \
.next = (uint32_t)(_next) & 0xFFFFFF \
}
#define be_const_key_int(_i, _next) { \ #define be_const_key_int(_i, _next) { \
.v.i = _i, \ .v.i = _i, \
.type = BE_INT, \ .type = BE_INT, \
@ -230,6 +236,13 @@ const bntvmodule be_native_module(_module) = { \
BE_STRING \ BE_STRING \
} }
#define be_nested_str_literal(_name_) \
{ \
{ .s=(be_nested_const_str(_name_, _hash, sizeof(_name_)-1 ))\
}, \
BE_STRING \
}
#define be_str_literal(_str) \ #define be_str_literal(_str) \
be_nested_const_str(_str, 0, sizeof(_str)-1 ) be_nested_const_str(_str, 0, sizeof(_str)-1 )

View File

@ -94,9 +94,63 @@ static void toidentifier(char *to, const char *p)
*to = 0; // final NULL *to = 0; // final NULL
} }
static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, const char *key);
static void m_solidify_map(bvm *vm, bmap * map, const char *class_name) /********************************************************************\
* Encode string as literals with simple rules
*
* Encode specifically \\, \n, \"
* All other characters outside of 0x20-0x7F is escaped with \x..
\********************************************************************/
static unsigned toliteral_length(const char *s)
{
unsigned len = 1;
const char * p = s;
while (*p) {
if (*p == '\n' || *p == '\\' || *p == '"') {
len += 2;
} else if (*p >= 0x20 && (*p & 0x80) == 0) {
len++;
} else {
len += 4; /* encode as \x.. */
}
p++;
}
return len;
}
static void toliteral(char *to, const char *p)
{
while (*p) {
if (*p == '\n') {
to[0] = '\\';
to[1] = 'n';
to += 2;
} else if (*p == '\\') {
to[0] = '\\';
to[1] = '\\';
to += 2;
} else if (*p == '"') {
to[0] = '\\';
to[1] = '"';
to += 2;
} else if (*p >= 0x20 && (*p & 0x80) == 0) {
*to = *p;
to++;
} else {
to[0] = '\\';
to[1] = 'x';
to[2] = hexdigit((*p & 0xF0) >> 4);
to[3] = hexdigit(*p & 0x0F);
to += 4;
}
p++;
}
*to = 0; // final NULL
}
static void m_solidify_bvalue(bvm *vm, bbool str_literal, bvalue * value, const char *classname, const char *key);
static void m_solidify_map(bvm *vm, bbool str_literal, bmap * map, const char *class_name)
{ {
// compact first // compact first
be_map_compact(vm, map); be_map_compact(vm, map);
@ -116,14 +170,21 @@ static void m_solidify_map(bvm *vm, bmap * map, const char *class_name)
if (node->key.type == BE_STRING) { if (node->key.type == BE_STRING) {
/* convert the string literal to identifier */ /* convert the string literal to identifier */
const char * key = str(node->key.v.s); const char * key = str(node->key.v.s);
size_t id_len = toidentifier_length(key); if (!str_literal) {
char id_buf[id_len]; size_t id_len = toidentifier_length(key);
toidentifier(id_buf, key); char id_buf[id_len];
logfmt(" { be_const_key(%s, %i), ", id_buf, key_next); toidentifier(id_buf, key);
m_solidify_bvalue(vm, &node->value, class_name, str(node->key.v.s)); logfmt(" { be_const_key(%s, %i), ", id_buf, key_next);
} else {
size_t id_len = toliteral_length(key);
char id_buf[id_len];
toliteral(id_buf, key);
logfmt(" { be_const_key_literal(\"%s\", %i), ", id_buf, key_next);
}
m_solidify_bvalue(vm, str_literal, &node->value, class_name, str(node->key.v.s));
} else if (node->key.type == BE_INT) { } else if (node->key.type == BE_INT) {
logfmt(" { be_const_key_int(%i, %i), ", node->key.v.i, key_next); logfmt(" { be_const_key_int(%i, %i), ", node->key.v.i, key_next);
m_solidify_bvalue(vm, &node->value, class_name, NULL); m_solidify_bvalue(vm, str_literal, &node->value, class_name, NULL);
} else { } else {
char error[64]; char error[64];
snprintf(error, sizeof(error), "Unsupported type in key: %i", node->key.type); snprintf(error, sizeof(error), "Unsupported type in key: %i", node->key.type);
@ -136,21 +197,21 @@ static void m_solidify_map(bvm *vm, bmap * map, const char *class_name)
} }
static void m_solidify_list(bvm *vm, blist * list, const char *class_name) static void m_solidify_list(bvm *vm, bbool str_literal, blist * list, const char *class_name)
{ {
logfmt(" be_nested_list(%i,\n", list->count); logfmt(" be_nested_list(%i,\n", list->count);
logfmt(" ( (struct bvalue*) &(const bvalue[]) {\n"); logfmt(" ( (struct bvalue*) &(const bvalue[]) {\n");
for (int i = 0; i < list->count; i++) { for (int i = 0; i < list->count; i++) {
logfmt(" "); logfmt(" ");
m_solidify_bvalue(vm, &list->data[i], class_name, ""); m_solidify_bvalue(vm, str_literal, &list->data[i], class_name, "");
logfmt(",\n"); logfmt(",\n");
} }
logfmt(" }))"); // TODO need terminal comma? logfmt(" }))"); // TODO need terminal comma?
} }
// pass key name in case of class, or NULL if none // pass key name in case of class, or NULL if none
static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, const char *key) static void m_solidify_bvalue(bvm *vm, bbool str_literal, bvalue * value, const char *classname, const char *key)
{ {
int type = var_primetype(value); int type = var_primetype(value);
switch (type) { switch (type) {
@ -188,10 +249,17 @@ static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, co
if (len >= 255) { if (len >= 255) {
be_raise(vm, "internal_error", "Strings greater than 255 chars not supported yet"); be_raise(vm, "internal_error", "Strings greater than 255 chars not supported yet");
} }
size_t id_len = toidentifier_length(str); if (!str_literal) {
char id_buf[id_len]; size_t id_len = toidentifier_length(str);
toidentifier(id_buf, str); char id_buf[id_len];
logfmt("be_nested_str(%s)", id_buf); toidentifier(id_buf, str);
logfmt("be_nested_str(%s)", id_buf);
} else {
size_t id_len = toliteral_length(str);
char id_buf[id_len];
toliteral(id_buf, str);
logfmt("be_nested_str_literal(\"%s\")", id_buf);
}
} }
break; break;
case BE_CLOSURE: case BE_CLOSURE:
@ -235,16 +303,16 @@ static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, co
} else { } else {
logfmt(" be_const_list( * "); logfmt(" be_const_list( * ");
} }
m_solidify_bvalue(vm, &ins->members[0], classname, key); m_solidify_bvalue(vm, str_literal, &ins->members[0], classname, key);
logfmt(" ) } ))"); logfmt(" ) } ))");
} }
} }
break; break;
case BE_MAP: case BE_MAP:
m_solidify_map(vm, (bmap *) var_toobj(value), classname); m_solidify_map(vm, str_literal, (bmap *) var_toobj(value), classname);
break; break;
case BE_LIST: case BE_LIST:
m_solidify_list(vm, (blist *) var_toobj(value), classname); m_solidify_list(vm, str_literal, (blist *) var_toobj(value), classname);
break; break;
default: default:
{ {
@ -255,24 +323,24 @@ static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, co
} }
} }
static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins); static void m_solidify_subclass(bvm *vm, bbool str_literal, bclass *cl, int builtins);
/* solidify any inner class */ /* solidify any inner class */
static void m_solidify_proto_inner_class(bvm *vm, bproto *pr, int builtins) static void m_solidify_proto_inner_class(bvm *vm, bbool str_literal, bproto *pr, int builtins)
{ {
// parse any class in constants to output it first // parse any class in constants to output it first
if (pr->nconst > 0) { if (pr->nconst > 0) {
for (int k = 0; k < pr->nconst; k++) { for (int k = 0; k < pr->nconst; k++) {
if (var_type(&pr->ktab[k]) == BE_CLASS) { if (var_type(&pr->ktab[k]) == BE_CLASS) {
// output the class // output the class
m_solidify_subclass(vm, (bclass*) var_toobj(&pr->ktab[k]), builtins); m_solidify_subclass(vm, str_literal, (bclass*) var_toobj(&pr->ktab[k]), builtins);
} }
} }
} }
} }
static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int builtins, int indent) static void m_solidify_proto(bvm *vm, bbool str_literal, bproto *pr, const char * func_name, int builtins, int indent)
{ {
// const char * func_name = str(pr->name); // const char * func_name = str(pr->name);
// const char * func_source = str(pr->source); // const char * func_source = str(pr->source);
@ -302,7 +370,7 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
size_t sub_len = strlen(func_name) + 10; size_t sub_len = strlen(func_name) + 10;
char sub_name[sub_len]; char sub_name[sub_len];
snprintf(sub_name, sizeof(sub_name), "%s_%d", func_name, i); snprintf(sub_name, sizeof(sub_name), "%s_%d", func_name, i);
m_solidify_proto(vm, pr->ptab[i], sub_name, builtins, indent+2); m_solidify_proto(vm, str_literal, pr->ptab[i], sub_name, builtins, indent+2);
logfmt(",\n"); logfmt(",\n");
} }
logfmt("%*s}),\n", indent, ""); logfmt("%*s}),\n", indent, "");
@ -315,7 +383,7 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
logfmt("%*s( &(const bvalue[%2d]) { /* constants */\n", indent, "", pr->nconst); logfmt("%*s( &(const bvalue[%2d]) { /* constants */\n", indent, "", pr->nconst);
for (int k = 0; k < pr->nconst; k++) { for (int k = 0; k < pr->nconst; k++) {
logfmt("%*s/* K%-3d */ ", indent, "", k); logfmt("%*s/* K%-3d */ ", indent, "", k);
m_solidify_bvalue(vm, &pr->ktab[k], NULL, NULL); m_solidify_bvalue(vm, str_literal, &pr->ktab[k], NULL, NULL);
logfmt(",\n"); logfmt(",\n");
} }
logfmt("%*s}),\n", indent, ""); logfmt("%*s}),\n", indent, "");
@ -324,12 +392,18 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
} }
/* convert the string literal to identifier */ /* convert the string literal to identifier */
{ if (!str_literal) {
const char * key = str(pr->name); const char * key = str(pr->name);
size_t id_len = toidentifier_length(key); size_t id_len = toidentifier_length(key);
char id_buf[id_len]; char id_buf[id_len];
toidentifier(id_buf, key); toidentifier(id_buf, key);
logfmt("%*s&be_const_str_%s,\n", indent, "", id_buf); logfmt("%*s&be_const_str_%s,\n", indent, "", id_buf);
} else {
const char * key = str(pr->name);
size_t id_len = toliteral_length(key);
char id_buf[id_len];
toliteral(id_buf, key);
logfmt("%*sbe_str_literal(\"%s\"),\n", indent, "", id_buf);
} }
// hard-code source as "solidified" for solidified // hard-code source as "solidified" for solidified
logfmt("%*s&be_const_str_solidified,\n", indent, ""); logfmt("%*s&be_const_str_solidified,\n", indent, "");
@ -356,7 +430,7 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
} }
static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, int builtins) static void m_solidify_closure(bvm *vm, bbool str_literal, bclosure *cl, const char * classname, int builtins)
{ {
bproto *pr = cl->proto; bproto *pr = cl->proto;
const char * func_name = str(pr->name); const char * func_name = str(pr->name);
@ -368,7 +442,7 @@ static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, in
int indent = 2; int indent = 2;
m_solidify_proto_inner_class(vm, pr, builtins); m_solidify_proto_inner_class(vm, str_literal, pr, builtins);
logfmt("\n"); logfmt("\n");
logfmt("/********************************************************************\n"); logfmt("/********************************************************************\n");
@ -384,7 +458,7 @@ static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, in
func_name_id); func_name_id);
} }
m_solidify_proto(vm, pr, func_name, builtins, indent); m_solidify_proto(vm, str_literal, pr, func_name, builtins, indent);
logfmt("\n"); logfmt("\n");
// closure // closure
@ -392,7 +466,7 @@ static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, in
logfmt("/*******************************************************************/\n\n"); logfmt("/*******************************************************************/\n\n");
} }
static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins) static void m_solidify_subclass(bvm *vm, bbool str_literal, bclass *cl, int builtins)
{ {
const char * class_name = str(cl->name); const char * class_name = str(cl->name);
@ -403,7 +477,7 @@ static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins)
while ((node = be_map_next(cl->members, &iter)) != NULL) { while ((node = be_map_next(cl->members, &iter)) != NULL) {
if (var_isstr(&node->key) && var_isclosure(&node->value)) { if (var_isstr(&node->key) && var_isclosure(&node->value)) {
bclosure *f = var_toobj(&node->value); bclosure *f = var_toobj(&node->value);
m_solidify_closure(vm, f, class_name, builtins); m_solidify_closure(vm, str_literal, f, class_name, builtins);
} }
} }
} }
@ -427,22 +501,32 @@ static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins)
} }
if (cl->members) { if (cl->members) {
m_solidify_map(vm, cl->members, class_name); m_solidify_map(vm, str_literal, cl->members, class_name);
logfmt(",\n"); logfmt(",\n");
} else { } else {
logfmt(" NULL,\n"); logfmt(" NULL,\n");
} }
logfmt(" be_str_literal(\"%s\")\n", class_name); if (!str_literal) {
size_t id_len = toidentifier_length(class_name);
char id_buf[id_len];
toidentifier(id_buf, class_name);
logfmt(" &be_const_str_%s,\n", id_buf);
} else {
size_t id_len = toliteral_length(class_name);
char id_buf[id_len];
toliteral(id_buf, class_name);
logfmt(" be_str_literal(\"%s\")\n", id_buf);
}
logfmt(");\n"); logfmt(");\n");
} }
static void m_solidify_class(bvm *vm, bclass *cl, int builtins) static void m_solidify_class(bvm *vm, bbool str_literal, bclass *cl, int builtins)
{ {
const char * class_name = str(cl->name); const char * class_name = str(cl->name);
m_solidify_subclass(vm, cl, builtins); m_solidify_subclass(vm, str_literal, cl, builtins);
logfmt("/*******************************************************************/\n\n"); logfmt("/*******************************************************************/\n\n");
logfmt("void be_load_%s_class(bvm *vm) {\n", class_name); logfmt("void be_load_%s_class(bvm *vm) {\n", class_name);
@ -452,7 +536,7 @@ static void m_solidify_class(bvm *vm, bclass *cl, int builtins)
logfmt("}\n"); logfmt("}\n");
} }
static void m_solidify_module(bvm *vm, bmodule *ml, int builtins) static void m_solidify_module(bvm *vm, bbool str_literal, bmodule *ml, int builtins)
{ {
const char * module_name = be_module_name(ml); const char * module_name = be_module_name(ml);
if (!module_name) { module_name = ""; } if (!module_name) { module_name = ""; }
@ -464,11 +548,11 @@ static void m_solidify_module(bvm *vm, bmodule *ml, int builtins)
while ((node = be_map_next(ml->table, &iter)) != NULL) { while ((node = be_map_next(ml->table, &iter)) != NULL) {
if (var_isstr(&node->key) && var_isclosure(&node->value)) { if (var_isstr(&node->key) && var_isclosure(&node->value)) {
bclosure *f = var_toobj(&node->value); bclosure *f = var_toobj(&node->value);
m_solidify_closure(vm, f, module_name, builtins); m_solidify_closure(vm, str_literal, f, module_name, builtins);
} }
if (var_isstr(&node->key) && var_isclass(&node->value)) { if (var_isstr(&node->key) && var_isclass(&node->value)) {
bclass *cl = var_toobj(&node->value); bclass *cl = var_toobj(&node->value);
m_solidify_subclass(vm, cl, builtins); m_solidify_subclass(vm, str_literal, cl, builtins);
} }
} }
} }
@ -483,7 +567,7 @@ static void m_solidify_module(bvm *vm, bmodule *ml, int builtins)
logfmt(" \"%s\",\n", module_name); logfmt(" \"%s\",\n", module_name);
if (ml->table) { if (ml->table) {
m_solidify_map(vm, ml->table, module_name); m_solidify_map(vm, str_literal, ml->table, module_name);
logfmt("\n"); logfmt("\n");
} else { } else {
logfmt(" NULL,\n"); logfmt(" NULL,\n");
@ -501,12 +585,16 @@ static int m_dump(bvm *vm)
{ {
if (be_top(vm) >= 1) { if (be_top(vm) >= 1) {
bvalue *v = be_indexof(vm, 1); bvalue *v = be_indexof(vm, 1);
bbool str_literal = bfalse;
if (be_top(vm) >= 2) {
str_literal = be_tobool(vm, 2);
}
if (var_isclosure(v)) { if (var_isclosure(v)) {
m_solidify_closure(vm, var_toobj(v), NULL, be_builtin_count(vm)); m_solidify_closure(vm, str_literal, var_toobj(v), NULL, be_builtin_count(vm));
} else if (var_isclass(v)) { } else if (var_isclass(v)) {
m_solidify_class(vm, var_toobj(v), be_builtin_count(vm)); m_solidify_class(vm, str_literal, var_toobj(v), be_builtin_count(vm));
} else if (var_ismodule(v)) { } else if (var_ismodule(v)) {
m_solidify_module(vm, var_toobj(v), be_builtin_count(vm)); m_solidify_module(vm, str_literal, var_toobj(v), be_builtin_count(vm));
} else { } else {
be_raise(vm, "value_error", "unsupported type"); be_raise(vm, "value_error", "unsupported type");
} }