forked from iainkfraser/TooLegitToJit
/
xlogue.c
485 lines (375 loc) · 16.5 KB
/
xlogue.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
/*
* (C) Iain Fraser - GPLv3
*
* Lua function prologue and epilogue code generation. The code is a bit
* of a mess because I moved the naive inline version to the function
* calling version and didnt delete the original but instead commneted it
* out using the preprocessor.
*/
#include <stdbool.h>
#include <stdlib.h>
#include <assert.h>
#include "frame.h"
#include "machine.h"
#include "lopcodes.h"
#include "synthetic.h"
#include "lua.h"
#include "stack.h"
#include "jitfunc.h"
static int stack_frame_size( int nr_locals ){
int k = 4 * 4; // ra, closure, return position, nr results
return nr_locals * 8 + k;
}
/* Grab non temps first then temps */
static void prefer_nontemp_acquire_reg( struct machine_ops* mop, struct emitter* e, struct machine* m,
int n, operand reg[n] ){
// must fit into registers only
assert( n <= m->nr_reg );
const int nr_temps = m->nr_temp_regs;
const int nr_nontemps = m->nr_reg - nr_temps;
for( int i = 0; i < n; i++ ){
if( i < nr_nontemps )
reg[i] = OP_TARGETREG( m->reg[ nr_temps + i ] );
else
reg[i] = OP_TARGETREG( acquire_temp( mop, e, m ) ); // ( i - nr_nontemps );
}
// disable spill
disable_spill( m );
}
static void prefer_nontemp_release_reg( struct machine_ops* mop, struct emitter* e, struct machine* m, int n ){
const int nr_temps = m->nr_temp_regs;
const int nr_nontemps = m->nr_reg - nr_temps;
for( int i = nr_nontemps; i < n; i++ ){
release_temp( mop, e, m );
}
enable_spill( m );
}
// in order of nontemporary register assignment priority
enum REGARGS { RA_NR_ARGS, RA_BASE, RA_COUNT };
//enum { RA_DST, RA_SRC, RA_EXIST, RA_EXPECT, RA_SIZE };
enum { RA_EXIST, RA_SRC, RA_DST, RA_EXPECT, RA_SIZE };
static void precall( struct machine_ops* mop, struct emitter* e, struct frame* f, int vregbase, int narg, int nret ){
// new frame assumes no temporaries have been used yet
assert( temps_accessed( f->m ) == 0 );
assert( RA_COUNT + 1 <= f->m->nr_reg ); // regargs are passed by register NOT stack
vreg_operand clive = vreg_to_operand( f, vregbase, false );
vreg_operand cstack = vreg_to_operand( f, vregbase, true );
assert( cstack.value.tag == OT_DIRECTADDR );
// TODO: verify its a closure using clive
// get arg passing registers
operand rargs[ RA_COUNT ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_COUNT, rargs );
// calculate number of args
if( narg > 0 )
mop->move( e, f->m, rargs[ RA_NR_ARGS ], OP_TARGETIMMED( narg - 1 ) );
else{
// calculate total by subtracting basereg address from stack.
// 2 becuase 8 for (ebp,closure) another 8 for the function being called ( rem actual args = args - 1 )
mop->add( e, f->m, rargs[ RA_NR_ARGS ], OP_TARGETREG( f->m->fp ), OP_TARGETIMMED( -8 * ( 2 + vregbase ) ) );
mop->sub( e, f->m, rargs[ RA_NR_ARGS ], rargs[ RA_NR_ARGS ], OP_TARGETREG( f->m->sp ) );
mop->udiv( e, f->m, rargs[ RA_NR_ARGS ], rargs[ RA_NR_ARGS ], OP_TARGETIMMED( 8 ) );
}
// calcualte base address
mop->add( e, f->m, rargs[ RA_BASE ], OP_TARGETREG( cstack.value.base ), OP_TARGETIMMED( cstack.value.offset ) );
// call function without spilling any temps
#if 0
bool prior = disable_spill( f->m );
mop->call( e, f->m, LBL_ABS( clive.value ) );
restore_spill( f->m, prior );
#else
jfunc_call( mop, e, f->m, JF_PROLOGUE, 0, JFUNC_UNLIMITED_STACK, 2, rargs[ RA_EXIST ], rargs[ RA_SRC ] );
#endif
// release temps used in call
prefer_nontemp_release_reg( mop, e, f->m, RA_COUNT );
}
static void postcall( struct machine_ops* mop, struct emitter* e, struct frame* f, int vregbase, int narg, int nret ){
vreg_operand basestack = vreg_to_operand( f, vregbase, true );
operand rargs[ RA_SIZE ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_SIZE, rargs );
// max stack clobber
const int maxstack = 3; // prior frame has buffer of pushed return addr, frame pointer and closure
#define USE_JFUNC_FOR_VARRES
// set dst and expect
#if 0
mop->add( e, f->m, rargs[ RA_DST ], OP_TARGETREG( basestack.value.base ), OP_TARGETIMMED( basestack.value.offset ) );
#endif
if( nret == 0 ){
#ifdef USE_JFUNC_FOR_VARRES
jfunc_call( mop, e, f->m, JF_VARRES_POSTCALL, 0, maxstack, 4, rargs[ RA_SRC ], rargs[ RA_DST ],
rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
#else
mop->move( e, f->m, rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
// update stack position
mop->mul( e, f->m, rargs[ RA_NR_ARGS ], rargs[ RA_NR_ARGS ], OP_TARGETIMMED( 8 ) ); // in word units
mop->add( e, f->m, rargs[ RA_NR_ARGS ], rargs[ RA_NR_ARGS ], OP_TARGETIMMED( 8 + 8 * vregbase ) );
mop->sub( e, f->m, OP_TARGETREG( f->m->sp ), OP_TARGETREG( f->m->fp ), rargs[ RA_NR_ARGS ] );
mop->move( e, f->m, rargs[ RA_EXIST ], rargs[ RA_EXPECT ] );
#endif
} else {
mop->move( e, f->m, rargs[ RA_EXPECT ], OP_TARGETIMMED( nret - 1 ) );
#ifdef USE_JFUNC_FOR_VARRES
jfunc_call( mop, e, f->m, JF_ARG_RES_CPY, 0, maxstack, 4, rargs[ RA_SRC ], rargs[ RA_DST ],
rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
#endif
}
#ifndef USE_JFUNC_FOR_VARRES
jfunc_call( mop, e, f->m, JF_ARG_RES_CPY, 0, maxstack, 4, rargs[ RA_SRC ], rargs[ RA_DST ],
rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
#endif
prefer_nontemp_release_reg( mop, e, f->m, RA_SIZE );
}
void do_call( struct machine_ops* mop, struct emitter* e, struct frame* f, int vregbase, int narg, int nret ){
precall( mop, e, f, vregbase, narg, nret );
postcall( mop, e, f, vregbase, narg, nret );
}
/*
* Load the following and call epilogue:
* r[ RA_NR_ARGS ] = number of results
* r[ RA_BASE ] = start address of results
*/
void do_ret( struct machine_ops* mop, struct emitter* e, struct frame* f, int vregbase, int nret ){
assert( RA_COUNT <= f->m->nr_reg ); // regargs are passed by register NOT stack
vreg_operand basestack = vreg_to_operand( f, vregbase, true );
/*
* if nret == 0 then prev instruction was call and calls save vregs on stack. Therefore
* only when nret > 0 do we need to save live regs onto stack. Do it first so it can
* use as many temps as it wants before we reserve them for return procedure.
*/
if( nret > 0 )
#if 1
save_frame_limit( mop, e, f, vregbase, nret - 1 );
#else
// TODO: the above is from vregbase - so need to think about this one
jfunc_call( mop, e, f->m, JF_STORE_LOCALS, jf_storelocal_offset( f->m, nret - 1 ), JFUNC_UNLIMITED_STACK, 0 );
#endif
operand rargs[ RA_COUNT ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_COUNT, rargs );
mop->add( e, f->m, rargs[ RA_BASE ], OP_TARGETREG( basestack.value.base ), OP_TARGETIMMED( basestack.value.offset ) );
if( nret > 0 ) {
mop->move( e, f->m, rargs[ RA_NR_ARGS ], OP_TARGETIMMED( nret - 1 ) );
} else {
mop->sub( e, f->m, rargs[ RA_NR_ARGS ], rargs[ RA_BASE ], OP_TARGETREG( f->m->sp ) );
mop->udiv( e, f->m, rargs[ RA_NR_ARGS ], rargs[ RA_NR_ARGS ], OP_TARGETIMMED( 8 ) );
}
mop->b( e, f->m, LBL_ABS( OP_TARGETIMMED( (uintptr_t)jfunc_addr( e, JF_EPILOGUE ) ) ) );
}
void prologue( struct machine_ops* mop, struct emitter* e, struct frame* f ){
#if 1
const operand sp = OP_TARGETREG( f->m->sp );
const operand fp = OP_TARGETREG( f->m->fp );
const int nparams = f->nr_params;
operand rargs[ RA_SIZE ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_SIZE, rargs );
mop->add( e, f->m, sp, sp, OP_TARGETIMMED( -( 8 * f->nr_locals ) ) );
if( nparams ){
// set nparams
mop->move( e, f->m, rargs[ RA_EXPECT ], OP_TARGETIMMED( nparams ) );
// do argument cpy
jfunc_call( mop, e, f->m, JF_ARG_RES_CPY, 0, JFUNC_UNLIMITED_STACK, 4, rargs[ RA_SRC ], rargs[ RA_DST ],
rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
// do call
jfunc_call( mop, e, f->m, JF_LOAD_LOCALS, jf_loadlocal_offset( f->m, nparams ), JFUNC_UNLIMITED_STACK, 0 );
}
prefer_nontemp_release_reg( mop, e, f->m, RA_SIZE );
#else
// new frame assumes no temporaries have been used yet
assert( temps_accessed( f->m ) == 0 );
const operand sp = OP_TARGETREG( f->m->sp );
const operand fp = OP_TARGETREG( f->m->fp );
const int nparams = f->nr_params;
operand rargs[ RA_SIZE ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_SIZE, rargs );
// push old frame pointer, closure addr / result start addr, expected nr or results
if( f->m->is_ra )
pushn( mop, e, f->m, 3, OP_TARGETREG( f->m->ra), fp, rargs[ RA_SRC ] );
else
pushn( mop, e, f->m, 2, fp, rargs[ RA_SRC ] );
// set ebp and update stack
mop->add( e, f->m, fp, sp, OP_TARGETIMMED( 4 ) ); // point to ebp so add 4
mop->add( e, f->m, sp, sp, OP_TARGETIMMED( -( 8 * f->nr_locals ) ) );
if( nparams ) {
const vreg_operand basestack = vreg_to_operand( f, 0, true ); // destination is first local
const int maxstack = JFUNC_UNLIMITED_STACK;
// set src ( always start after closure see Lua VM for reason )
mop->add( e, f->m, rargs[ RA_SRC ], rargs[ RA_SRC ], OP_TARGETIMMED( -8 ) );
// set dst and expect
mop->add( e, f->m, rargs[ RA_DST ], OP_TARGETREG( basestack.value.base ), OP_TARGETIMMED( basestack.value.offset ) );
mop->move( e, f->m, rargs[ RA_EXPECT ], OP_TARGETIMMED( nparams ) );
jfunc_call( mop, e, f->m, JF_ARG_RES_CPY, 0, maxstack, 4, rargs[ RA_SRC ], rargs[ RA_DST ],
rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
prefer_nontemp_release_reg( mop, e, f->m, RA_SIZE );
#if 0
load_frame_limit( mop, e, f, 0, nparams ); // load locals living in registers
#else
jfunc_call( mop, e, f->m, JF_LOAD_LOCALS, jf_loadlocal_offset( f->m, nparams ), maxstack, 0 );
#endif
}
#endif
}
void epilogue( struct machine_ops* mop, struct emitter* e, struct frame* f ){
#if 0 // epilogue is now JFunction - so shared amongst all functions
const operand sp = OP_TARGETREG( f->m->sp );
const operand fp = OP_TARGETREG( f->m->fp );
// reset stack
mop->move( e, f->m, sp, fp );
pop( mop, e, f->m, fp );
mop->ret( e, f->m );
#endif
}
/*
* Generate JFunction for copying arguments and results. Spills must be minimized
* to stop clobbering the previous frames locals.
*/
static void do_copying( struct machine_ops* mop, struct emitter* e, struct machine* m,
operand iter, operand limit, operand dst, operand src ){
// start loop
e->ops->label_local( e, 0 );
mop->beq( e, m, iter, limit, LBL_NEXT( 0 ) ); // TODO: bgt is equiv to beq so swap?
// copy
mop->move( e, m, OP_TARGETDADDR( dst.reg, 0 ), OP_TARGETDADDR( src.reg, 0 ) );
mop->move( e, m, OP_TARGETDADDR( dst.reg, -4 ), OP_TARGETDADDR( src.reg, -4 ) );
// update pointers
mop->add( e, m, dst, dst, OP_TARGETIMMED( -8 ) );
mop->add( e, m, src, src, OP_TARGETIMMED( -8 ) );
// update iterator
mop->add( e, m, iter, iter, OP_TARGETIMMED( 1 ) );
mop->b( e, m, LBL_PREV( 0 ) );
e->ops->label_local( e, 0 );
}
static void do_niling( struct machine_ops* mop, struct emitter* e, struct machine* m,
operand iter, operand limit, operand dst, operand src ){
// start loop
e->ops->label_local( e, 0 );
mop->beq( e, m, iter, limit, LBL_NEXT( 0 ) ); // TODO: bgt is equiv to beq so swap?
// copy
mop->move( e, m, OP_TARGETDADDR( dst.reg, 0 ), OP_TARGETDADDR( src.reg, 0 ) );
mop->move( e, m, OP_TARGETDADDR( dst.reg, -4 ), OP_TARGETDADDR( src.reg, -4 ) );
// update pointers
mop->add( e, m, dst, dst, OP_TARGETIMMED( -8 ) );
mop->add( e, m, src, src, OP_TARGETIMMED( -8 ) );
// update iterator
mop->add( e, m, iter, iter, OP_TARGETIMMED( 1 ) );
mop->b( e, m, LBL_PREV( 0 ) );
e->ops->label_local( e, 0 );
}
static void do_nilling( struct machine_ops* mop, struct emitter* e, struct machine* m,
operand iter, operand limit, operand dst ){
// start loop
e->ops->label_local( e, 0 );
mop->beq( e, m, iter, limit, LBL_NEXT( 0 ) ); // TODO: bgt is equiv to beq so swap?
// copy
mop->move( e, m, OP_TARGETDADDR( dst.reg, -4 ), OP_TARGETIMMED( LUA_TNIL ) );
// update pointers
mop->add( e, m, dst, dst, OP_TARGETIMMED( -8 ) );
// update iterator
mop->add( e, m, iter, iter, OP_TARGETIMMED( 1 ) );
mop->b( e, m, LBL_PREV( 0 ) );
e->ops->label_local( e, 0 );
}
void jinit_cpy_arg_res( struct JFunc* jf, struct machine_ops* mop, struct emitter* e, struct machine* m ){
operand rargs[ RA_SIZE ];
prefer_nontemp_acquire_reg( mop, e, m, RA_SIZE, rargs );
syn_min( mop, e, m, rargs[ RA_EXIST ], rargs[ RA_EXIST ], rargs[ RA_EXPECT ] );
// init iterator
operand iter = OP_TARGETREG( acquire_temp( mop, e, m ) );
mop->move( e, m, iter, OP_TARGETIMMED( 0 ) );
do_copying( mop, e, m, iter, rargs[ RA_EXIST ], rargs[ RA_DST ], rargs[ RA_SRC ] );
/*
* TODO: Below logic is incorrect the first is most likely to get spilled. So change enum order. BUT
* then you have to think about prefer saved reg and the order there.
* TODO: add error prefer_nontemps to error when not all live simultenously
*
* RA_EXPECT is last register therefore its the most likely to be spilled. So to stop repeat
* spill/unspill move it to exist.
*/
mop->move( e, m, rargs[ RA_SIZE ], rargs[ RA_EXIST ] );
do_nilling( mop, e, m, iter, rargs[ RA_EXIST ], rargs[ RA_DST ] );
release_temp( mop, e, m );
prefer_nontemp_release_reg( mop, e, m, RA_SIZE );
mop->ret( e, m );
}
void jinit_epi( struct JFunc* jf, struct machine_ops* mop, struct emitter* e, struct machine* m ){
// phoney frame
struct frame F = { .m = m, .nr_locals = 1, .nr_params = 0 };
struct frame *f = &F;
const operand sp = OP_TARGETREG( m->sp );
const operand fp = OP_TARGETREG( m->fp );
operand rargs[ RA_SIZE ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_SIZE, rargs );
// reset stack
// mop->move( e, m, sp, fp );
mop->add( e, m, sp, fp, OP_TARGETIMMED( -4 ) );
if( m->is_ra )
popn( mop, e, m , 3, rargs[ RA_DST ], fp, OP_TARGETREG( m->ra ) );
else
popn( mop, e, m, 2, rargs[ RA_DST ], fp );
// pop( mop, e, m, fp );
mop->ret( e, m );
prefer_nontemp_release_reg( mop, e, f->m, RA_SIZE );
}
/*
* Do the majority ( function independent ) part of the prologue. That is: store the frame section,
* update src and dst pointers and call the memcpy.
*
* The function specific code needs to set the number of params, update the stack ( requires # of locals )
* and then unspill params.
*/
void jinit_pro( struct JFunc* jf, struct machine_ops* mop, struct emitter* e, struct machine* m ){
// phoney frame
struct frame F = { .m = m, .nr_locals = 1, .nr_params = 0 };
struct frame *f = &F;
const operand sp = OP_TARGETREG( f->m->sp );
const operand fp = OP_TARGETREG( f->m->fp );
const vreg_operand basestack = vreg_to_operand( f, 0, true ); // destination is first local
const int maxstack = JFUNC_UNLIMITED_STACK;
operand rargs[ RA_SIZE ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_SIZE, rargs );
// push old frame pointer, closure addr / result start addr, expected nr or results
if( f->m->is_ra )
pushn( mop, e, f->m, 3, OP_TARGETREG( f->m->ra), fp, rargs[ RA_SRC ] );
else
pushn( mop, e, f->m, 2, fp, rargs[ RA_SRC ] );
// set ebp and update stack
mop->add( e, f->m, fp, sp, OP_TARGETIMMED( 4 ) ); // point to ebp so add 4
// set src ( always start after closure see Lua VM for reason )
mop->add( e, f->m, rargs[ RA_SRC ], rargs[ RA_SRC ], OP_TARGETIMMED( -8 ) );
mop->add( e, f->m, rargs[ RA_DST ], OP_TARGETREG( basestack.value.base ), OP_TARGETIMMED( basestack.value.offset ) );
/*
* Call the actual function, which is the closure. On RISC this will clobber
* temp hopefully this isn't a live reg or we will get exception. On CISC
* there is probably indirect direct address jmp instruction ( x86 does 0 ).
*/
mop->b( e, f->m, LBL_ABS( OP_TARGETDADDR( rargs[ RA_SRC ].reg, 8 ) ) );
prefer_nontemp_release_reg( mop, e, f->m, RA_SIZE );
}
/*
* The number of results is not know before hand. Need to update stack for future
* calls.
*/
void jinit_vresult_postcall( struct JFunc* jf, struct machine_ops* mop, struct emitter* e, struct machine* m ){
// phoney frame
struct frame F = { .m = m, .nr_locals = 1, .nr_params = 0 };
struct frame *f = &F;
operand rargs[ RA_SIZE ];
prefer_nontemp_acquire_reg( mop, e, f->m, RA_SIZE, rargs );
// max stack clobber
const int maxstack = 3; // prior frame has buffer of pushed return addr, frame pointer and closure
// consume as many results as available
mop->move( e, f->m, rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
// if register based remember return address
if( f->m->is_ra )
pushn( mop, e, f->m, 1, OP_TARGETREG( f->m->ra) ); // not safe cause of stack
// copy args across
jfunc_call( mop, e, f->m, JF_ARG_RES_CPY, 0, maxstack, 4, rargs[ RA_SRC ], rargs[ RA_DST ],
rargs[ RA_EXPECT ], rargs[ RA_EXIST ] );
if( f->m->is_ra )
popn( mop, e, f->m, 1, OP_TARGETREG( f->m->ra) );
/*
* this depends heavily on copy arg implementation, it assumes ptrs will point to
* the top of the stack after copying i.e. the last result copied.
*/
mop->add( e, m, OP_TARGETREG( f->m->sp ), rargs[ RA_DST ], OP_TARGETIMMED( 0 ) );
prefer_nontemp_release_reg( mop, e, f->m, RA_SIZE );
// return
mop->ret( e, m );
}