main(int argc, char *argv[]) { // make sure the filename is specified if (argc < 2) { printf("Please specify the name of the file to compile\n"); return; } char *name = argv[1]; char funName[20]; char newFileName[20]; char line[110]; // create the symbol table if (create_symbol_table(name, funName) == 0) { exit(0); } // create the target file int i = 0; while(name[i] != '.'){ newFileName[i] = name[i]; i++; } newFileName[i] = '.'; newFileName[i + 1] = 'l'; newFileName[i + 2] = 'c'; newFileName[i + 3] = '3'; newFileName[i + 4] = '\0'; FILE *output = fopen(newFileName, "w"); if(output == NULL) exit(0); //compile the file FILE *infile = open_file(name); fgets(line, 110, infile); generate_prologue(funName, output); while(!feof(infile)){ fgets(line, 110, infile); compile_line(line, output); } generate_epilogue(output); }
transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) { uint32_t offsets[8] = {0, 4*N, 2*N, 6*N, N, 5*N, 7*N, 3*N}; uint32_t offsets_o[8] = {0, 4*N, 2*N, 6*N, 7*N, 3*N, N, 5*N}; int32_t pAddr = 0; int32_t pN = 0; int32_t pLUT = 0; insns_t *fp; insns_t *start; insns_t *x_4_addr; insns_t *x_8_addr; uint32_t loop_count; int count; ptrdiff_t len; size_t *ps; size_t *pps; count = ffts_tree_count(N, leaf_N, 0) + 1; ps = pps = malloc(2 * count * sizeof(*ps)); if (!ps) { return NULL; } ffts_elaborate_tree(&pps, N, leaf_N, 0); pps[0] = 0; pps[1] = 0; pps = ps; #ifdef HAVE_SSE if (sign < 0) { p->constants = (const void*) sse_constants; } else { p->constants = (const void*) sse_constants_inv; } #endif fp = (insns_t*) p->transform_base; /* generate base cases */ x_4_addr = generate_size4_base_case(&fp, sign); x_8_addr = generate_size8_base_case(&fp, sign); #ifdef __arm__ start = generate_prologue(&fp, p); #ifdef HAVE_NEON memcpy(fp, neon_ee, neon_oo - neon_ee); if (sign < 0) { fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_oo - neon_ee) / 4; #else memcpy(fp, vfp_e, vfp_o - vfp_e); if (sign > 0) { fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040; fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040; fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040; } fp += (vfp_o - vfp_e) / 4; #endif #else /* generate functions */ start = generate_prologue(&fp, p); loop_count = 4 * p->i0; generate_leaf_init(&fp, loop_count); if (ffts_ctzl(N) & 1) { generate_leaf_ee(&fp, offsets, p->i1 ? 6 : 0); if (p->i1) { loop_count += 4 * p->i1; generate_leaf_oo(&fp, loop_count, offsets_o, 7); } loop_count += 4; generate_leaf_oe(&fp, offsets_o); } else { generate_leaf_ee(&fp, offsets, N >= 256 ? 2 : 8); loop_count += 4; generate_leaf_eo(&fp, offsets); if (p->i1) { loop_count += 4 * p->i1; generate_leaf_oo(&fp, loop_count, offsets_o, N >= 256 ? 4 : 7); } } if (p->i1) { uint32_t offsets_oe[8] = {7*N, 3*N, N, 5*N, 0, 4*N, 6*N, 2*N}; loop_count += 4 * p->i1; /* align loop/jump destination */ #ifdef _M_X64 x86_mov_reg_imm(fp, X86_EBX, loop_count); #else x86_mov_reg_imm(fp, X86_ECX, loop_count); ffts_align_mem16(&fp, 9); #endif generate_leaf_ee(&fp, offsets_oe, 0); } generate_transform_init(&fp); /* generate subtransform calls */ count = 2; while (pps[0]) { size_t ws_is; if (!pN) { #ifdef _M_X64 x86_mov_reg_imm(fp, X86_EBX, pps[0]); #else x86_mov_reg_imm(fp, X86_ECX, pps[0] / 4); #endif } else { int offset = (4 * pps[1]) - pAddr; if (offset) { #ifdef _M_X64 x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8); #else x64_alu_reg_imm_size(fp, X86_ADD, X64_RDX, offset, 8); #endif } if (pps[0] > leaf_N && pps[0] - pN) { int factor = ffts_ctzl(pps[0]) - ffts_ctzl(pN); #ifdef _M_X64 if (factor > 0) { x86_shift_reg_imm(fp, X86_SHL, X86_EBX, factor); } else { x86_shift_reg_imm(fp, X86_SHR, X86_EBX, -factor); } #else if (factor > 0) { x86_shift_reg_imm(fp, X86_SHL, X86_ECX, factor); } else { x86_shift_reg_imm(fp, X86_SHR, X86_ECX, -factor); } #endif } } ws_is = 8 * p->ws_is[ffts_ctzl(pps[0] / leaf_N) - 1]; if (ws_is != pLUT) { int offset = (int) (ws_is - pLUT); #ifdef _M_X64 x64_alu_reg_imm_size(fp, X86_ADD, X64_R9, offset, 8); #else x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8); #endif } if (pps[0] == 2 * leaf_N) { x64_call_code(fp, x_4_addr); } else { x64_call_code(fp, x_8_addr); } pAddr = 4 * pps[1]; if (pps[0] > leaf_N) { pN = pps[0]; } pLUT = ws_is;//LUT_offset(pps[0], leafN); //fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT); count += 4; pps += 2; } #endif #ifdef __arm__ #ifdef HAVE_NEON if (ffts_ctzl(N) & 1) { ADDI(&fp, 2, 7, 0); ADDI(&fp, 7, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 8, 0); ADDI(&fp, 8, 10, 0); ADDI(&fp, 10, 2, 0); if(p->i1) { MOVI(&fp, 11, p->i1); memcpy(fp, neon_oo, neon_eo - neon_oo); if(sign < 0) { fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000; fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_eo - neon_oo) / 4; } *fp = LDRI(11, 1, ((uint32_t)&p->oe_ws) - ((uint32_t)p)); fp++; memcpy(fp, neon_oe, neon_end - neon_oe); if(sign < 0) { fp[19] ^= 0x00200000; fp[20] ^= 0x00200000; fp[22] ^= 0x00200000; fp[23] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[64] ^= 0x00200000; fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[67] ^= 0x00200000; } fp += (neon_end - neon_oe) / 4; } else { *fp = LDRI(11, 1, ((uint32_t)&p->eo_ws) - ((uint32_t)p)); fp++; memcpy(fp, neon_eo, neon_oe - neon_eo); if(sign < 0) { fp[10] ^= 0x00200000; fp[11] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[31] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000; fp[35] ^= 0x00200000; fp[59] ^= 0x00200000; fp[60] ^= 0x00200000; fp[61] ^= 0x00200000; fp[62] ^= 0x00200000; } fp += (neon_oe - neon_eo) / 4; ADDI(&fp, 2, 7, 0); ADDI(&fp, 7, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 8, 0); ADDI(&fp, 8, 10, 0); ADDI(&fp, 10, 2, 0); if(p->i1) { MOVI(&fp, 11, p->i1); memcpy(fp, neon_oo, neon_eo - neon_oo); if(sign < 0) { fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000; fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_eo - neon_oo) / 4; } } if(p->i1) { ADDI(&fp, 2, 3, 0); ADDI(&fp, 3, 7, 0); ADDI(&fp, 7, 2, 0); ADDI(&fp, 2, 4, 0); ADDI(&fp, 4, 8, 0); ADDI(&fp, 8, 2, 0); ADDI(&fp, 2, 5, 0); ADDI(&fp, 5, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 6, 0); ADDI(&fp, 6, 10, 0); ADDI(&fp, 10, 2, 0); ADDI(&fp, 2, 9, 0); ADDI(&fp, 9, 10, 0); ADDI(&fp, 10, 2, 0); *fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++; MOVI(&fp, 11, p->i1); memcpy(fp, neon_ee, neon_oo - neon_ee); if(sign < 0) { fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000; fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000; } fp += (neon_oo - neon_ee) / 4; } #else ADDI(&fp, 2, 7, 0); ADDI(&fp, 7, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 8, 0); ADDI(&fp, 8, 10, 0); ADDI(&fp, 10, 2, 0); MOVI(&fp, 11, (p->i1>0) ? p->i1 : 1); memcpy(fp, vfp_o, vfp_x4 - vfp_o); if(sign > 0) { fp[22] ^= 0x00000040; fp[24] ^= 0x00000040; fp[25] ^= 0x00000040; fp[26] ^= 0x00000040; fp[62] ^= 0x00000040; fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[66] ^= 0x00000040; } fp += (vfp_x4 - vfp_o) / 4; ADDI(&fp, 2, 3, 0); ADDI(&fp, 3, 7, 0); ADDI(&fp, 7, 2, 0); ADDI(&fp, 2, 4, 0); ADDI(&fp, 4, 8, 0); ADDI(&fp, 8, 2, 0); ADDI(&fp, 2, 5, 0); ADDI(&fp, 5, 9, 0); ADDI(&fp, 9, 2, 0); ADDI(&fp, 2, 6, 0); ADDI(&fp, 6, 10, 0); ADDI(&fp, 10, 2, 0); ADDI(&fp, 2, 9, 0); ADDI(&fp, 9, 10, 0); ADDI(&fp, 10, 2, 0); *fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++; MOVI(&fp, 11, (p->i2>0) ? p->i2 : 1); memcpy(fp, vfp_e, vfp_o - vfp_e); if(sign > 0) { fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040; fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040; fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040; } fp += (vfp_o - vfp_e) / 4; #endif *fp = LDRI(2, 1, ((uint32_t)&p->ws) - ((uint32_t)p)); fp++; // load offsets into r12 //ADDI(&fp, 2, 1, 0); MOVI(&fp, 1, 0); // args: r0 - out // r1 - N // r2 - ws // ADDI(&fp, 3, 1, 0); // put N into r3 for counter count = 2; while(pps[0]) { // fprintf(stderr, "size %zu at %zu - diff %zu\n", pps[0], pps[1]*4, (pps[1]*4) - pAddr); if(!pN) { MOVI(&fp, 1, pps[0]); } else { if((pps[1]*4)-pAddr) ADDI(&fp, 0, 0, (pps[1] * 4)- pAddr); if(pps[0] - pN) ADDI(&fp, 1, 1, pps[0] - pN); } if (p->ws_is[ffts_ctzl(pps[0]/leaf_N)-1]*8 - pLUT) { ADDI(&fp, 2, 2, p->ws_is[ffts_ctzl(pps[0]/leaf_N)-1]*8 - pLUT); } if(pps[0] == 2 * leaf_N) { *fp = BL(fp+2, x_4_addr); fp++; } else if(!pps[2]) { //uint32_t *x_8_t_addr = fp; #ifdef HAVE_NEON memcpy(fp, neon_x8_t, neon_ee - neon_x8_t); if(sign < 0) { fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000; fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[70] ^= 0x00200000; fp[74] ^= 0x00200000; fp[97] ^= 0x00200000; fp[98] ^= 0x00200000; fp[102] ^= 0x00200000; fp[104] ^= 0x00200000; } fp += (neon_ee - neon_x8_t) / 4; //*fp++ = BL(fp+2, x_8_t_addr); #else *fp = BL(fp+2, x_8_addr); fp++; #endif } else { *fp = BL(fp+2, x_8_addr); fp++; } pAddr = pps[1] * 4; pN = pps[0]; pLUT = p->ws_is[ffts_ctzl(pps[0]/leaf_N)-1]*8;//LUT_offset(pps[0], leafN); // fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT); count += 4; pps += 2; } *fp++ = 0xecbd8b10; *fp++ = POP_LR(); count++; #else generate_epilogue(&fp); #endif // *fp++ = B(14); count++; //for(int i=0;i<(neon_x8 - neon_x4)/4;i++) // fprintf(stderr, "%08x\n", x_4_addr[i]); //fprintf(stderr, "\n"); //for(int i=0;i<count;i++) //fprintf(stderr, "size of transform %u = %d\n", N, (fp - x_8_addr) * sizeof(*fp)); free(ps); #if defined(_MSC_VER) #pragma warning(push) /* disable type cast warning from data pointer to function pointer */ #pragma warning(disable : 4055) #endif return (transform_func_t) start; #if defined(_MSC_VER) #pragma warning(pop) #endif }