static int dsp32_vect_realmul_kernel_ext(dsp32_t *vect1, dsp32_t *vect2, int size, dsp32_t real) { __asm__ __volatile__ ( "pushm r0-r7, lr\n\t" "mov lr, 0\n\t" "sub r10, 3\n\t" "cp.h lr, r10\n\t" ASM_INSTRUCT_COMPACKED("brge __dsp32_realmul_ext_end_loop")"\n" "__dsp32_realmul_ext_loop:\n\t" "ld.d r0, r11[lr << 2]\n\t" "muls.d r4, r1, r9\n\t" "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY) "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" #endif "muls.d r4, r0, r9\n\t" "lsl r2, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY) "or r2, r2, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" #endif "st.d r12[lr << 2], r2\n\t" "sub lr, -2\n\t" "ld.d r0, r11[lr << 2]\n\t" "muls.d r4, r1, r9\n\t" \ "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY) "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" #endif "muls.d r4, r0, r9\n\t" "lsl r2, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY) "or r2, r2, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" #endif "st.d r12[lr << 2], r2\n\t" "sub lr, -2\n\t" "cp.h lr, r10\n\t" ASM_INSTRUCT_COMPACKED("brlt __dsp32_realmul_ext_loop")"\n" "__dsp32_realmul_ext_end_loop:\n\t" "mov r12, lr\n\t" "popm r0-r7, pc\n\t" ); return 0; }
static int dsp16_vect_dotdiv_kernel_ext(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size) { __asm__ __volatile__ ( \ "pushm r0-r7, lr\n\t" \ \ "mov lr, 0\n\t" \ "sub r9, 3\n\t" \ \ "cp.h lr, r9\n\t" \ ASM_INSTRUCT_COMPACKED("brge __dsp16_dotdiv_ext_end_loop")"\n" \ \ "__dsp16_dotdiv_ext_loop:\n\t" \ \ "ld.sh r0, r11[0x0]\n\t" \ "ld.sh r1, r10[0x0]\n\t" \ "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \ "divs r2, r0, r1\n\t" \ \ "ld.sh r0, r11[0x2]\n\t" \ "ld.sh r1, r10[0x2]\n\t" \ "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \ "divs r4, r0, r1\n\t" \ \ "sthh.w r12[lr << 1], r2:b, r4:b\n\t" \ "sub lr, -2\n\t" \ \ "ld.sh r0, r11[0x4]\n\t" \ "ld.sh r1, r10[0x4]\n\t" \ "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \ "divs r2, r0, r1\n\t" \ \ "ld.sh r0, r11[0x6]\n\t" \ "ld.sh r1, r10[0x6]\n\t" \ "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \ "divs r4, r0, r1\n\t" \ \ "sub r10, -8\n\t" \ "sub r11, -8\n\t" \ \ "sthh.w r12[lr << 1], r2:b, r4:b\n\t" \ "sub lr, -2\n\t" \ \ "cp.h lr, r9\n\t" \ ASM_INSTRUCT_COMPACKED("brlt __dsp16_dotdiv_ext_loop")"\n" \ \ "__dsp16_dotdiv_ext_end_loop:\n\t" \ \ "mov r12, lr\n\t" \ "popm r0-r7, pc\n\t" \ ); \ return 0; }
static int dsp16_vect_dotmul_kernel_ext(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size) { __asm__ __volatile__ ( \ "pushm r0-r7, lr\n\t" \ \ "mov lr, 0\n\t" \ "sub r9, 3\n\t" \ \ "cp.h lr, r9\n\t" \ ASM_INSTRUCT_COMPACKED("brge __dsp16_dotmul_ext_end_loop")"\n" \ \ "__dsp16_dotmul_ext_loop:\n\t" \ \ "ld.d r0, r11[lr << 1]\n\t" \ "ld.d r2, r10[lr << 1]\n\t" \ \ "mulhh.w r4, r1:b, r3:b\n\t" \ "lsr r4, "ASTRINGZ(DSP16_QB)"\n\t" \ "mulhh.w r5, r1:t, r3:t\n\t" \ "lsr r5, "ASTRINGZ(DSP16_QB)"\n\t" \ "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \ \ "sub lr, -2\n\t" \ "mulhh.w r4, r0:b, r2:b\n\t" \ "lsr r4, "ASTRINGZ(DSP16_QB)"\n\t" \ "mulhh.w r5, r0:t, r2:t\n\t" \ "lsr r5, "ASTRINGZ(DSP16_QB)"\n\t" \ "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \ \ "sub lr, -2\n\t" \ \ "cp.h lr, r9\n\t" \ ASM_INSTRUCT_COMPACKED("brlt __dsp16_dotmul_ext_loop")"\n" \ \ "__dsp16_dotmul_ext_end_loop:\n\t" \ \ "mov r12, lr\n\t" \ "popm r0-r7, pc\n\t" \ ); \ return 0; }
void iirpart_kernel_opti_den_small(dsp16_t *vect1, dsp16_t *vect2, int vect1_size, dsp16_t *vect3, int vect3_size, int prediv) { __asm__ __volatile__ ( "pushm r0-r7, lr\n\t" "sub sp, 12\n\t" "mov r1, r11\n\t" "mov r11, r12\n\t" "mov r12, r1\n\t" "lddsp r1, sp[48]\n\t" "rsub r1, r1, "ASTRINGZ(DSP16_QB)"\n\t" "add r9, r9, r8 << 1\n\t" "stdsp sp[0x08], r9\n" "__dsp16_iirpart_loop_main:\n\t" "stdsp sp[0x04], r11\n\t" "stdsp sp[0x00], r10\n\t" "lddsp r9, sp[0x08]\n\t" "mov r0, 0\n\t" "mov lr, r0\n" "__dsp16_iirpart_loop_tap:\n\t" "ld.uh r4, --r9\n\t" "ld.uh r6, r12[lr << 1]\n\t" "machh.w r0, r4:b, r6:b\n\t" "sub lr, -1\n\t" "cp.h lr, r8\n\t" ASM_INSTRUCT_COMPACKED("brlt __dsp16_iirpart_loop_tap")"\n\t" "asr r0, r0, r1\n\t" "lddsp r11, sp[0x04]\n\t" "ld.sh r2, r11[0x0]\n\t" "sub r2, r0\n\t" "st.h r11++, r2\n\t" "sub r12, -2\n\t" "lddsp r10, sp[0x00]\n\t" "sub r10, 1\n\t" "brgt __dsp16_iirpart_loop_main\n\t" "sub sp, -12\n\t" "popm r0-r7, pc\n\t" ); }
static int dsp32_vect_neg_kernel_ext(dsp32_t *vect1, dsp32_t *vect2, int size) { __asm__ __volatile__ ( "pushm r0-r7, lr\n\t" "mov lr, 0\n\t" "sub r10, 3\n\t" "cp.h lr, r10\n\t" ASM_INSTRUCT_COMPACKED("brge __dsp32_neg_ext_end_loop")"\n" "__dsp32_neg_ext_loop:\n\t" "ld.d r0, r11[lr << 2]\n\t" "neg r0\n\t" "neg r1\n\t" "st.d r12[lr << 2], r0\n\t" "sub lr, -2\n\t" "ld.d r0, r11[lr << 2]\n\t" "neg r0\n\t" "neg r1\n\t" "st.d r12[lr << 2], r0\n\t" "sub lr, -2\n\t" "cp.h lr, r10\n\t" ASM_INSTRUCT_COMPACKED("brlt __dsp32_neg_ext_loop")"\n" "__dsp32_neg_ext_end_loop:\n\t" "mov r12, lr\n\t" "popm r0-r7, pc\n\t" ); return 0; }
static int dsp32_vect_max_kernel_ext(dsp32_t *vect1, int size, dsp32_t *max) { __asm__ __volatile__ ( "pushm r0-r7, lr\n\t" "ld.w r2, r10[0x00]\n\t" "sub lr, r11, 4\n\t" ASM_INSTRUCT_COMPACKED("brlt __dsp32_max_ext_end_loop")"\n" "__dsp32_max_ext_loop:\n\t" "ld.d r0, r12[0x00]\n\t" "max r2, r2, r0\n\t" "max r2, r2, r1\n\t" "ld.d r0, r12[0x08]\n\t" "max r2, r2, r0\n\t" "max r2, r2, r1\n\t" "sub r12, -16\n\t" "sub lr, 4\n\t" ASM_INSTRUCT_COMPACKED("brge __dsp32_max_ext_loop")"\n" "__dsp32_max_ext_end_loop:\n\t" "st.w r10[0x00], r2\n\t" "sub lr, -4\n\t" "sub r12, r11, lr\n\t" "popm r0-r7, pc\n\t" ); return 0; }