Commit 05a6350b authored by Camil Staps's avatar Camil Staps 🚀

Merge branch 'unrelocated-serialization' into 'master'

Add deserialization support for the WebAssembly interpreter

See merge request !103
parents 404cc93b c0f4ecb2
Pipeline #20593 passed with stages
in 17 minutes and 51 seconds
...@@ -63,6 +63,17 @@ defaultDeserializationSettings :: DeserializationSettings ...@@ -63,6 +63,17 @@ defaultDeserializationSettings :: DeserializationSettings
*/ */
serialize :: a !String !*World -> *(!Maybe SerializedGraph, !*World) serialize :: a !String !*World -> *(!Maybe SerializedGraph, !*World)
/**
* Serialize an expression for unrelocated interpretation. This is a mode of
* interpretation where the code and data addresses are fixed. It is useful for
* the WebAssembly interpreter where memory always starts at index 0.
*
* @param The value to serialize.
* @param The path to the executable's bytecode (set by the `ByteCode` option in the project file).
* @result The result may be `Nothing` if the bytecode could not be parsed.
*/
serialize_for_unrelocated_interpretation :: a !String !String !*World -> *(!Maybe String, !*World)
/** /**
* Deserialize an expression using the ABC interpreter. * Deserialize an expression using the ABC interpreter.
* This version copies nodes as soon as they are in head normal form. * This version copies nodes as soon as they are in head normal form.
......
implementation module ABC.Interpreter implementation module ABC.Interpreter
import StdArray import StdArray
import StdBool
import StdClass import StdClass
import StdFile import StdFile
import StdInt import StdInt
...@@ -42,20 +43,20 @@ defaultDeserializationSettings = ...@@ -42,20 +43,20 @@ defaultDeserializationSettings =
serialize :: a !String !*World -> *(!Maybe SerializedGraph, !*World) serialize :: a !String !*World -> *(!Maybe SerializedGraph, !*World)
serialize graph bcfile w serialize graph bcfile w
# (graph,descs,mods) = copy_to_string_with_names graph # (graph,descinfo,modules) = copy_to_string_with_names graph
# (bytecode,w) = readFile bcfile w # (bytecode,w) = readFile bcfile w
| isNothing bytecode = (Nothing, w) | isNothing bytecode = (Nothing, w)
# bytecode = fromJust bytecode # bytecode = fromJust bytecode
#! (len,bytecodep) = strip_bytecode bytecode {#symbol_name di mods \\ di <-: descs} #! (len,bytecodep) = strip_bytecode bytecode {#symbol_name di modules \\ di <-: descinfo}
#! bytecode = derefCharArray bytecodep len #! bytecode = derefCharArray bytecodep len
| free_to_false bytecodep = (Nothing, w) | free_to_false bytecodep = (Nothing, w)
# rec = # rec =
{ graph = graph { graph = graph
, descinfo = descs , descinfo = descinfo
, modules = mods , modules = modules
, bytecode = bytecode , bytecode = bytecode
} }
= (Just rec, w) = (Just rec, w)
...@@ -74,6 +75,144 @@ where ...@@ -74,6 +75,144 @@ where
ccall strip_bytecode "sA:VIp" ccall strip_bytecode "sA:VIp"
} }
serialize_for_unrelocated_interpretation :: a !String !String !*World -> *(!Maybe String, !*World)
serialize_for_unrelocated_interpretation graph bcfile thisexe w
# (host_syms,w) = accFiles (read_symbols thisexe) w
# (graph,descinfo,modules) = copy_to_string_with_names graph
# (bytecode,w) = readFile bcfile w
| isNothing bytecode = (Nothing, w)
# bytecode = fromJust bytecode
# pgm = parse host_syms bytecode
| isNothing pgm = (Nothing, w)
# pgm = fromJust pgm
# code_start = get_code pgm
# int_syms = {#s \\ s <- getInterpreterSymbols pgm}
# int_syms = {#predef_or_lookup_symbol code_start d modules int_syms \\ d <-: descinfo}
# graph = replace_desc_numbers_by_descs 0 graph int_syms 0 code_start // relocate relative to beginning of code segment
= (Just graph, w)
where
get_code :: !Pointer -> Pointer
get_code pgm = code {
ccall get_code "p:p"
}
predef_or_lookup_symbol :: !Int !DescInfo !{#String} !{#Symbol} -> Int
predef_or_lookup_symbol code_start di mods syms = case di.di_name of
"_ARRAY_" -> code_start-1*8+2
"_STRING_" -> code_start-2*8+2
"BOOL" -> code_start-3*8+2
"CHAR" -> code_start-4*8+2
"REAL" -> code_start-5*8+2
"INT" -> code_start-6*8+2
"dINT" -> code_start-6*8+2
_ -> lookup_symbol_value di mods syms
// This is like the function with the same name in GraphCopy's
// graph_copy_with_names, but it assigns even negative descriptor numbers
// to predefined symbols so that it matches predef_or_lookup_symbol above.
replace_desc_numbers_by_descs :: !Int !*{#Char} !{#Int} !Int !Int -> *{#Char}
replace_desc_numbers_by_descs i s symbol_a symbol_offset array_desc
| i>=size s
| i==size s = s
| otherwise = abort "error in replace_desc_numbers_by_descs\n"
#! desc=get_word_from_string s i
| desc<0
= replace_desc_numbers_by_descs (i+IF_INT_64_OR_32 8 4) s symbol_a symbol_offset array_desc
# desc = symbol_a.[desc-1]
# desc=desc+symbol_offset
# s=store_int_in_string s i (desc-array_desc)
| desc bitand 2==0
# d = get_thunk_n_non_pointers desc
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 8 4)+(d<<(IF_INT_64_OR_32 3 2))) s symbol_a symbol_offset array_desc
# (d,not_array) = get_descriptor_n_non_pointers_and_not_array desc
| not_array
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 8 4)+(d<<(IF_INT_64_OR_32 3 2))) s symbol_a symbol_offset array_desc
| d==0 // _STRING_
#! l = get_word_from_string s (i+IF_INT_64_OR_32 8 4)
# l = IF_INT_64_OR_32 ((l+7) bitand -8) ((l+3) bitand -4)
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 16 8)+l) s symbol_a symbol_offset array_desc
| d==1 // _ARRAY_
#! d = get_word_from_string s (i+IF_INT_64_OR_32 16 8)
| d==0
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 24 12)) s symbol_a symbol_offset array_desc
# d = symbol_a.[d-1]
# d = d+symbol_offset
# s=store_int_in_string s (i+IF_INT_64_OR_32 16 8) (d-array_desc)
#! l = get_word_from_string s (i+IF_INT_64_OR_32 8 4)
| d==array_desc-5*8+2 // REAL
# l = l << IF_INT_64_OR_32 3 2
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 24 12)+l) s symbol_a symbol_offset array_desc
| d==array_desc-6*8+2 // INT
# l = l << 3
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 24 12)+l) s symbol_a symbol_offset array_desc
| d==array_desc-3*8+2 // BOOL
# l = IF_INT_64_OR_32 ((l+7) bitand -8) ((l+3) bitand -4)
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 24 12)+l) s symbol_a symbol_offset array_desc
# arity = get_D_node_arity d
| arity>=256
# record_a_arity = get_D_record_a_arity d
# record_b_arity = arity-256-record_a_arity
# l = (l * record_b_arity) << IF_INT_64_OR_32 3 2
= replace_desc_numbers_by_descs (i+(IF_INT_64_OR_32 24 12)+l) s symbol_a symbol_offset array_desc
= abort (toString l+++" "+++toString d)
where
get_word_from_string :: !{#Char} !Int -> Int // get_D_from_string_64
get_word_from_string s i = code inline {
push_a_b 0
pop_a 1
addI
load_i 16
}
store_int_in_string :: !*{#Char} !Int !Int -> *{#Char} // 64-bit variant
store_int_in_string s i n =
{s & [i]=toChar n,[i+1]=toChar (n>>8),[i+2]=toChar (n>>16),[i+3]=toChar (n>>24),
[i+4]=toChar (n >> 32),[i+5]=toChar (n>>40),[i+6]=toChar (n>>48),[i+7]=toChar (n>>56)}
get_thunk_n_non_pointers:: !Int -> Int
get_thunk_n_non_pointers d
# arity = get_thunk_arity d
| arity<256
= 0
# b_size = arity>>8
= b_size
where
get_thunk_arity :: !Int -> Int // 64-bit version
get_thunk_arity a = code {
load_si32 -4
}
get_descriptor_n_non_pointers_and_not_array :: !Int -> (!Int,!Bool)
get_descriptor_n_non_pointers_and_not_array d
| d<array_desc
| d==array_desc-1*8+2 = (1,False) // _ARRAY_
| d==array_desc-2*8+2 = (0,False) // _STRING_
| d==array_desc-3*8+2 = (1,True) // BOOL
| d==array_desc-4*8+2 = (1,True) // CHAR
| d==array_desc-5*8+2 = (IF_INT_64_OR_32 1 2,True) // REAL
| d==array_desc-6*8+2 = (1,True) // INT/dINT
| otherwise = abort "internal error in serialize_for_unrelocated_interpretation\n"
# arity = get_D_node_arity d
| arity<256 = (0,True)
# record_a_arity = get_D_record_a_arity d
# record_b_arity = arity-256-record_a_arity
= (record_b_arity,True)
get_D_node_arity :: !Int -> Int
get_D_node_arity d = code inline {
load_si16 -2
}
get_D_record_a_arity :: !Int -> Int
get_D_record_a_arity d = code inline {
load_si16 0
}
deserialize :: !DeserializationSettings !SerializedGraph !String !*World -> *(!Maybe a, !*World) deserialize :: !DeserializationSettings !SerializedGraph !String !*World -> *(!Maybe a, !*World)
deserialize dsets graph thisexe w = deserialize` False dsets graph thisexe w deserialize dsets graph thisexe w = deserialize` False dsets graph thisexe w
......
...@@ -95,6 +95,29 @@ intp = new Uint8Array(intp); ...@@ -95,6 +95,29 @@ intp = new Uint8Array(intp);
{ {
clean: { clean: {
memory: memory, memory: memory,
debug: function(what,a,b,c) {
switch (what) {
case 0:
console.log('loop',a,'/',b,'; hp at',c);
break;
case 1:
console.log('desc',a);
break;
case 2:
console.log('arity',a);
break;
case 3:
console.log('unimplemented:',['large hnf','thunk'][a]);
break;
case 4:
console.log('redirect',a,c,'(from',b,')');
break;
case 5:
console.log('a arity',a);
break;
}
}
} }
} }
); );
...@@ -192,6 +215,20 @@ intp = new Uint8Array(intp); ...@@ -192,6 +215,20 @@ intp = new Uint8Array(intp);
}); });
} }
var i=scriptArgs.indexOf('--graph');
if (i >= 0) {
var graph = os.file.readFile(scriptArgs[i+1], 'binary');
graph = new Uint32Array(graph.buffer);
var unused_semispace = util.instance.exports.get_unused_semispace();
for (var i=0; i<graph.length; i++)
membuffer[unused_semispace/4+i] = graph[i];
var node = hp;
hp = util.instance.exports.copy_from_string(unused_semispace,graph.length/2,asp,bsp,hp,code_offset*8);
asp+=8;
membuffer[asp/4] = node;
start+=32; /* skip bootstrap to build start node; jump to _print_graph */
}
var time_start=new Date().getTime(); var time_start=new Date().getTime();
var r=intp.instance.exports.interpret(start, asp, bsp, csp, hp, heap_size/8); var r=intp.instance.exports.interpret(start, asp, bsp, csp, hp, heap_size/8);
......
This diff is collapsed.
...@@ -8,8 +8,6 @@ ...@@ -8,8 +8,6 @@
void free_program(struct program *pgm) { void free_program(struct program *pgm) {
if (pgm->code != NULL) if (pgm->code != NULL)
free(pgm->code); free(pgm->code);
if (pgm->data != NULL)
free(pgm->data);
if (pgm->symbol_table != NULL) if (pgm->symbol_table != NULL)
free(pgm->symbol_table); free(pgm->symbol_table);
if (pgm->symbols != NULL) if (pgm->symbols != NULL)
...@@ -47,6 +45,10 @@ struct symbol *get_symbol_table(struct program *pgm) { ...@@ -47,6 +45,10 @@ struct symbol *get_symbol_table(struct program *pgm) {
return pgm->symbol_table; return pgm->symbol_table;
} }
BC_WORD *get_code(struct program *pgm) {
return pgm->code;
}
struct host_symbol *find_host_symbol_by_name(struct program *pgm, char *name) { struct host_symbol *find_host_symbol_by_name(struct program *pgm, char *name) {
int start = 0; int start = 0;
int end = pgm->host_symbols_n - 1; int end = pgm->host_symbols_n - 1;
......
...@@ -90,11 +90,15 @@ void free_program(struct program *pgm); ...@@ -90,11 +90,15 @@ void free_program(struct program *pgm);
# ifdef LINK_CLEAN_RUNTIME # ifdef LINK_CLEAN_RUNTIME
int get_symbol_table_size(struct program *pgm); int get_symbol_table_size(struct program *pgm);
struct symbol *get_symbol_table(struct program *pgm); struct symbol *get_symbol_table(struct program *pgm);
BC_WORD *get_code(struct program *pgm);
/* Assumes the symbols are sorted by name */ /* Assumes the symbols are sorted by name */
struct host_symbol *find_host_symbol_by_name(struct program *pgm, char *name); struct host_symbol *find_host_symbol_by_name(struct program *pgm, char *name);
/* Assumes the symbols are sorted by (host) address */ /* Assumes the symbols are sorted by (host) address */
struct host_symbol *find_host_symbol_by_address(struct program *pgm, void *addr); struct host_symbol *find_host_symbol_by_address(struct program *pgm, void *addr);
void sort_host_symbols_by_location(struct program *pgm); void sort_host_symbols_by_location(struct program *pgm);
struct host_symbol *add_extra_host_symbol(struct program *pgm); struct host_symbol *add_extra_host_symbol(struct program *pgm);
# endif # endif
......
...@@ -210,22 +210,23 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env ...@@ -210,22 +210,23 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env
} }
/* not a basic type */ /* not a basic type */
a_arity=arity;
if (arity>256) { if (arity>256) {
a_arity=((int16_t*)desc)[0]; a_arity=((int16_t*)desc)[0];
arity-=256; arity-=256;
} }
ie->hp[0]=desc;
if (arity==0) { if (arity==0) {
desc-=10; desc-=10;
**ptr_stack--=desc; **ptr_stack--=desc;
s[i]=desc; s[i]=desc;
*--a_size_stack=0; *--a_size_stack=0;
continue; continue;
} else if (arity==1) { }
**ptr_stack--=(BC_WORD)ie->hp;
**ptr_stack--=(BC_WORD)ie->hp;
ie->hp[0]=desc;
if (arity==1) {
if (a_arity==1) if (a_arity==1)
*++ptr_stack=&ie->hp[1]; *++ptr_stack=&ie->hp[1];
else else
...@@ -234,7 +235,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env ...@@ -234,7 +235,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env
*--a_size_stack=1; *--a_size_stack=1;
continue; continue;
} else if (arity==2) { } else if (arity==2) {
**ptr_stack--=(BC_WORD)ie->hp;
if (a_arity==2) { if (a_arity==2) {
ptr_stack[2]=&ie->hp[1]; ptr_stack[2]=&ie->hp[1];
ptr_stack[1]=&ie->hp[2]; ptr_stack[1]=&ie->hp[2];
...@@ -253,7 +253,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env ...@@ -253,7 +253,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env
} }
/* large hnf */ /* large hnf */
**ptr_stack--=(BC_WORD)ie->hp;
ie->hp[2]=(BC_WORD)&ie->hp[3]; ie->hp[2]=(BC_WORD)&ie->hp[3];
if (a_arity==0) { if (a_arity==0) {
...@@ -273,9 +272,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env ...@@ -273,9 +272,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env
ie->hp+=arity+2; ie->hp+=arity+2;
*--a_size_stack = a_arity; *--a_size_stack = a_arity;
} else { /* thunk */ } else { /* thunk */
**ptr_stack--=(BC_WORD)ie->hp;
ie->hp[0]=desc;
int32_t arity=((int32_t*)desc)[-1]; int32_t arity=((int32_t*)desc)[-1];
int16_t a_arity=arity; int16_t a_arity=arity;
...@@ -286,8 +282,12 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env ...@@ -286,8 +282,12 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env
arity&=0xff; arity&=0xff;
} }
**ptr_stack--=(BC_WORD)ie->hp;
ie->hp[0]=desc;
ie->hp++; ie->hp++;
*--a_size_stack = a_arity;
for (int a=0; a<a_arity; a++) for (int a=0; a<a_arity; a++)
ptr_stack[a_arity-a]=&ie->hp[a]; ptr_stack[a_arity-a]=&ie->hp[a];
ptr_stack+=a_arity; ptr_stack+=a_arity;
...@@ -296,8 +296,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env ...@@ -296,8 +296,6 @@ BC_WORD *string_to_interpreter(uint64_t *clean_string, struct interpretation_env
ie->hp[b]=s[++i]; ie->hp[b]=s[++i];
ie->hp+=arity<3 ? 2 : arity; ie->hp+=arity<3 ? 2 : arity;
*--a_size_stack = a_arity;
} }
} }
......
...@@ -215,7 +215,7 @@ int parse_program(struct parser *state, struct char_provider *cp) { ...@@ -215,7 +215,7 @@ int parse_program(struct parser *state, struct char_provider *cp) {
switch (state->state) { switch (state->state) {
case PS_init: case PS_init:
{ {
uint32_t header_length; uint32_t header_length,code_size;
if (provide_chars(&elem32, sizeof(elem32), 1, cp) < 0) if (provide_chars(&elem32, sizeof(elem32), 1, cp) < 0)
return 1; return 1;
...@@ -235,11 +235,10 @@ int parse_program(struct parser *state, struct char_provider *cp) { ...@@ -235,11 +235,10 @@ int parse_program(struct parser *state, struct char_provider *cp) {
return 1; return 1;
header_length-=4; header_length-=4;
#ifdef LINKER #ifdef LINKER
state->code_size = elem32; state->code_size = code_size = elem32;
#else #else
state->program->code_size = elem32; state->program->code_size = code_size = elem32;
#endif #endif
state->program->code = safe_malloc(sizeof(BC_WORD) * elem32);
if (provide_chars(&elem32, sizeof(elem32), 1, cp) < 0) if (provide_chars(&elem32, sizeof(elem32), 1, cp) < 0)
return 1; return 1;
...@@ -273,7 +272,10 @@ int parse_program(struct parser *state, struct char_provider *cp) { ...@@ -273,7 +272,10 @@ int parse_program(struct parser *state, struct char_provider *cp) {
# else # else
state->program->data_size = elem32; state->program->data_size = elem32;
# endif # endif
state->program->data = safe_malloc(sizeof(BC_WORD) * state->program->data_size); /* The unrelocator writes data size between code and data segment, so reserve this space.
* TODO: better would be to use a different file format in the unrelocator. */
state->program->code = safe_malloc(sizeof(BC_WORD) * (code_size+state->program->data_size+1));
state->program->data = state->program->code + code_size + 1;
#endif #endif
if (provide_chars(&elem32, sizeof(elem32), 1, cp) < 0) if (provide_chars(&elem32, sizeof(elem32), 1, cp) < 0)
......
...@@ -316,18 +316,18 @@ where ...@@ -316,18 +316,18 @@ where
, "(func $clean_expR (import \"clean\" \"expR\") (param f64) (result f64))" , "(func $clean_expR (import \"clean\" \"expR\") (param f64) (result f64))"
, "(func $clean_lnR (import \"clean\" \"lnR\") (param f64) (result f64))" , "(func $clean_lnR (import \"clean\" \"lnR\") (param f64) (result f64))"
, "(func $clean_log10R (import \"clean\" \"log10R\") (param f64) (result f64))" , "(func $clean_log10R (import \"clean\" \"log10R\") (param f64) (result f64))"
, "(func $clean_debug_instr (import \"clean\" \"debug_instr\") (param i32 i32))" , if debug_instructions "(func $clean_debug_instr (import \"clean\" \"debug_instr\") (param i32 i32))" ""
, "(func $clean_illegal_instr (import \"clean\" \"illegal_instr\") (param i32 i32))" , "(func $clean_illegal_instr (import \"clean\" \"illegal_instr\") (param i32 i32))"
, "(func $clean_out_of_memory (import \"clean\" \"out_of_memory\"))" , "(func $clean_out_of_memory (import \"clean\" \"out_of_memory\"))"
, "(func $clean_gc (import \"clean\" \"gc\") (param i32) (result i64))" , "(func $clean_gc (import \"clean\" \"gc\") (param i32) (result i64))"
, "(func $clean_halt (import \"clean\" \"halt\") (param i32 i32 i32))" , "(func $clean_halt (import \"clean\" \"halt\") (param i32 i32 i32))"
, "(global $pc (export \"pc\") (mut i64) (i64.const 0))" , "(global $pc (mut i64) (i64.const 0))"
, "(global $asp (export \"asp\") (mut i64) (i64.const 0))" , "(global $asp (mut i64) (i64.const 0))"
, "(global $bsp (export \"bsp\") (mut i64) (i64.const 0))" , "(global $bsp (mut i64) (i64.const 0))"
, "(global $csp (export \"csp\") (mut i64) (i64.const 0))" , "(global $csp (mut i64) (i64.const 0))"
, "(global $hp (export \"hp\") (mut i64) (i64.const 0))" , "(global $hp (mut i64) (i64.const 0))"
, "(global $hp_size (export \"hp_size\") (mut i64) (i64.const 0))" , "(global $hp_size (mut i64) (i64.const 0))"
, "(global $hp_free (export \"hp_free\") (mut i64) (i64.const 0))" , "(global $hp_free (mut i64) (i64.const 0))"
, "(func (export \"get_asp\") (result i32) (i32.wrap_i64 (global.get $asp)))" , "(func (export \"get_asp\") (result i32) (i32.wrap_i64 (global.get $asp)))"
, "(func (export \"get_bsp\") (result i32) (i32.wrap_i64 (global.get $bsp)))" , "(func (export \"get_bsp\") (result i32) (i32.wrap_i64 (global.get $bsp)))"
, "(func (export \"get_csp\") (result i32) (i32.wrap_i64 (global.get $csp)))" , "(func (export \"get_csp\") (result i32) (i32.wrap_i64 (global.get $csp)))"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment