From 24be24082552aa41f6342fb3803fa5c6593eae12 Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Tue, 2 Jul 2024 18:02:52 +0800 Subject: [PATCH] rv64v: fix some exception checks with bugs or missing * add exception check for vector load/store, remove alignment check when access vector registers * move vector load/store instructions info generation from decode stage to execute stage * fix illegal vtype check * when vstart is not zero and is a arithmetic vector instruction, raise illegal instruction exception * support sew check for zext and sext instructions --- src/isa/riscv64/instr/rvf/decode.h | 16 -- src/isa/riscv64/instr/rvv/vcfg.h | 14 +- src/isa/riscv64/instr/rvv/vcommon.c | 27 +++- src/isa/riscv64/instr/rvv/vcommon.h | 1 + src/isa/riscv64/instr/rvv/vcompute.h | 12 +- src/isa/riscv64/instr/rvv/vcompute_impl.c | 28 ++-- src/isa/riscv64/instr/rvv/vldst.h | 33 ++++ src/isa/riscv64/instr/rvv/vldst_impl.c | 187 +++++++++++++++++----- src/isa/riscv64/instr/rvv/vreg_impl.c | 4 +- src/isa/riscv64/system/priv.c | 1 + 10 files changed, 226 insertions(+), 97 deletions(-) diff --git a/src/isa/riscv64/instr/rvf/decode.h b/src/isa/riscv64/instr/rvf/decode.h index 1765880a1..e935f7bfd 100644 --- a/src/isa/riscv64/instr/rvf/decode.h +++ b/src/isa/riscv64/instr/rvf/decode.h @@ -115,14 +115,6 @@ def_THelper(vstore_mmu) { def_THelper(fload) { print_Dop(id_src1->str, OP_STR_SIZE, "%ld(%s)", id_src2->imm, reg_name(s->isa.instr.i.rs1, 4)); - #ifdef CONFIG_RVV - const int table [8] = {1, 0, 0, 0, 0, 2, 4, 8}; - s->vm = s->isa.instr.v_opv.v_vm; //1 for without mask; 0 for with mask - s->v_width = table[s->isa.instr.vldfp.v_width]; - s->v_nf = s->isa.instr.vldfp.v_nf; - s->v_lsumop = s->isa.instr.vldfp.v_lsumop; - #endif // CONFIG_RVV - int mmu_mode = isa_mmu_state(); if (mmu_mode == MMU_DIRECT) { if (fp_enable()) { @@ -163,14 +155,6 @@ def_THelper(fload) { def_THelper(fstore) { print_Dop(id_src1->str, OP_STR_SIZE, "%ld(%s)", id_src2->imm, reg_name(s->isa.instr.i.rs1, 4)); - #ifdef CONFIG_RVV - const int table [8] = {1, 0, 0, 0, 0, 2, 4, 8}; - s->vm = s->isa.instr.v_opv.v_vm; //1 for without mask; 0 for with mask - s->v_width = table[s->isa.instr.vldfp.v_width]; - s->v_nf = s->isa.instr.vldfp.v_nf; - s->v_lsumop = s->isa.instr.vldfp.v_lsumop; - #endif // CONFIG_RVV - int mmu_mode = isa_mmu_state(); if (mmu_mode == MMU_DIRECT) { #ifndef CONFIG_FPU_NONE diff --git a/src/isa/riscv64/instr/rvv/vcfg.h b/src/isa/riscv64/instr/rvv/vcfg.h index 5e480a011..4f6e03a79 100644 --- a/src/isa/riscv64/instr/rvv/vcfg.h +++ b/src/isa/riscv64/instr/rvv/vcfg.h @@ -49,20 +49,18 @@ void set_vtype_vl(Decode *s, int mode) { if(vl_num == (uint64_t)-1 || check_vlmul_sew_illegal(id_src2->val)) { vtype->val = error; + + // if vtype illegal, set vl = 0, vd = 0 + vl->val = 0; + rtl_sr(s, id_dest->reg, &vl->val, 8); + return; } else { vtype->val = id_src2->val; } - // if vtype illegal,set vl = 0 ,vd = 0 - if(check_vlmul_sew_illegal(id_src2->val)){ - vl->val = 0; - - rtl_sr(s, id_dest->reg, &vl->val, 8/*4*/); - return; - } vl->val = vl_num; - rtl_sr(s, id_dest->reg, &vl_num, 8/*4*/); + rtl_sr(s, id_dest->reg, &vl_num, 8); vstart->val = 0; } diff --git a/src/isa/riscv64/instr/rvv/vcommon.c b/src/isa/riscv64/instr/rvv/vcommon.c index f3fcc7726..c4f6a1d1f 100644 --- a/src/isa/riscv64/instr/rvv/vcommon.c +++ b/src/isa/riscv64/instr/rvv/vcommon.c @@ -3,6 +3,8 @@ #include #include "vcommon.h" +#include + uint8_t check_vstart_ignore(Decode *s) { if(vstart->val >= vl->val) { if(vstart->val > 0) { @@ -15,13 +17,28 @@ uint8_t check_vstart_ignore(Decode *s) { return 0; } -bool check_vlmul_sew_illegal(rtlreg_t vtype_req){ - vtype_t vt = (vtype_t )vtype_req; +uint8_t check_vstart_exception(Decode *s) { + if(vstart->val > 0) { + longjmp_exception(EX_II); + } + if (vl->val == 0) { + return 1; + } + return 0; +} + +bool check_vlmul_sew_illegal(rtlreg_t vtype_req) { + vtype_t vt = (vtype_t) vtype_req; int vlmul = vt.vlmul; - int vsew = vt.vsew; if (vlmul > 4) vlmul -= 8; - if((vlmul < vsew + 3 - log2(MAXELEN)) || vlmul == 4) return true; // vmul < sew/ELEN || vlmul == 100 - return false; + int vsew = 8 << vt.vsew; + float vflmul = vlmul >= 0 ? 1 << vlmul : 1.0 / (1 << -vlmul); + float min_vflmul = vflmul < 1.0f ? vflmul : 1.0f; + int vill = !(vflmul >= 0.125 && vflmul <= 8) + || vsew > min_vflmul * 64 + || (vtype_req >> 8) != 0 + || vsew > 64; + return vill == 1; } void set_NAN(rtlreg_t* fpreg, uint64_t vsew){ diff --git a/src/isa/riscv64/instr/rvv/vcommon.h b/src/isa/riscv64/instr/rvv/vcommon.h index c8f601bbb..5440cb77b 100644 --- a/src/isa/riscv64/instr/rvv/vcommon.h +++ b/src/isa/riscv64/instr/rvv/vcommon.h @@ -9,6 +9,7 @@ #include "../local-include/rtl.h" uint8_t check_vstart_ignore(Decode *s); +uint8_t check_vstart_exception(Decode *s); bool check_vlmul_sew_illegal(rtlreg_t vtype_req); void set_NAN(rtlreg_t* fpreg, uint64_t vsew); bool check_isFpCanonicalNAN(rtlreg_t* fpreg, uint64_t vsew); diff --git a/src/isa/riscv64/instr/rvv/vcompute.h b/src/isa/riscv64/instr/rvv/vcompute.h index 8fdcf27da..dad60cca0 100644 --- a/src/isa/riscv64/instr/rvv/vcompute.h +++ b/src/isa/riscv64/instr/rvv/vcompute.h @@ -26,13 +26,11 @@ def_EHelper(vadd) { def_EHelper(vsub) { Assert(s->src_vmode != SRC_VI, "vsub.vi not supported\n"); ARTHI(SUB, SIGNED) - // print_asm_template3(vsub); } def_EHelper(vrsub) { Assert(s->src_vmode != SRC_VV, "vrsub.vv not supported\n"); ARTHI(RSUB, SIGNED) - // print_asm_template3(vrsub); } def_EHelper(vminu) { @@ -378,7 +376,7 @@ def_EHelper(vmvnr) { def_EHelper(vpopc) { require_vector(true); if(vstart->val != 0) - check_vstart_ignore(s); + check_vstart_exception(s); rtl_li(s, s1, 0); for(int idx = vstart->val; idx < vl->val; idx ++) { @@ -400,7 +398,7 @@ def_EHelper(vpopc) { def_EHelper(vfirst) { require_vector(true); if(vstart->val != 0) - check_vstart_ignore(s); + check_vstart_exception(s); int pos = -1; for(int idx = vstart->val; idx < vl->val; idx ++) { @@ -571,7 +569,7 @@ def_EHelper(viota) { require_aligned(id_dest->reg, vflmul); require_noover(id_dest->reg, vflmul, id_src2->reg, 1); - if(!check_vstart_ignore(s)) { + if(!check_vstart_exception(s)) { rtl_li(s, s1, 0); for(int idx = vstart->val; idx < vl->val; idx ++) { rtlreg_t mask = get_mask(0, idx, vtype->vsew, vtype->vlmul); @@ -617,7 +615,7 @@ def_EHelper(vid) { double vflmul = compute_vflmul(); require_aligned(id_dest->reg, vflmul); - if(!check_vstart_ignore(s)) { + if(!check_vstart_exception(s)) { for(int idx = 0; idx < vl->val; idx ++) { // mask rtlreg_t mask = get_mask(0, idx, vtype->vsew, vtype->vlmul); @@ -787,7 +785,7 @@ def_EHelper(vcompress) { longjmp_exception(EX_II); } require_noover(id_dest->reg, vflmul, id_src->reg, 1); - if(!check_vstart_ignore(s)) { + if(!check_vstart_exception(s)) { rtl_li(s, s1, 0); for(int idx = vstart->val; idx < vl->val; idx ++) { diff --git a/src/isa/riscv64/instr/rvv/vcompute_impl.c b/src/isa/riscv64/instr/rvv/vcompute_impl.c index cc122fafa..d5607718f 100644 --- a/src/isa/riscv64/instr/rvv/vcompute_impl.c +++ b/src/isa/riscv64/instr/rvv/vcompute_impl.c @@ -318,7 +318,6 @@ void vector_slide_check(Decode *s, bool is_over) { } void arthimetic_instr(int opcode, int is_signed, int widening, int narrow, int dest_mask, Decode *s) { - if(check_vstart_ignore(s)) return; require_vector(true); int vlmax = get_vlmax(vtype->vsew, vtype->vlmul); int idx; @@ -369,7 +368,12 @@ void arthimetic_instr(int opcode, int is_signed, int widening, int narrow, int d } else { vector_vwv_check(s, false); } + } else if (narrow < 0) { + if (vtype->vsew + narrow < 0) { + longjmp_exception(EX_II); + } } + if(check_vstart_exception(s)) return; for(idx = vstart->val; idx < vl->val; idx ++) { // mask rtlreg_t mask = get_mask(0, idx, vtype->vsew, vtype->vlmul); @@ -417,7 +421,7 @@ void arthimetic_instr(int opcode, int is_signed, int widening, int narrow, int d switch (opcode) { case VEXT: eew = vtype->vsew + narrow; - emul = vtype->vlmul - ((vtype->vsew) - (vtype->vsew + narrow)); + emul = vtype->vlmul + narrow; break; default: eew = vtype->vsew + narrow; @@ -833,8 +837,7 @@ void arthimetic_instr(int opcode, int is_signed, int widening, int narrow, int d * because the illegal instruction exception is handled in vcompute.h for vrgather and vslide instruction */ void permutaion_instr(int opcode, Decode *s) { - if(check_vstart_ignore(s)) return; - require_vector(true); + if(check_vstart_exception(s)) return; int vlmax = get_vlmax(vtype->vsew, vtype->vlmul); int idx; for(idx = vstart->val; idx < vl->val; idx ++) { @@ -978,7 +981,6 @@ void permutaion_instr(int opcode, Decode *s) { } void floating_arthimetic_instr(int opcode, int is_signed, int widening, int dest_mask, Decode *s) { - if(check_vstart_ignore(s)) return; require_vector(true); if (dest_mask) { if (s->src_vmode == SRC_VV) { @@ -1013,17 +1015,13 @@ void floating_arthimetic_instr(int opcode, int is_signed, int widening, int dest vector_wwv_check(s, false); } } + if(check_vstart_exception(s)) return; int idx; word_t FPCALL_TYPE = FPCALL_W64; // fpcall type switch (vtype->vsew) { case 0 : - switch (widening) { - case vdNarrow : FPCALL_TYPE = FPCALL_W16; break; - case vdWidening : FPCALL_TYPE = FPCALL_W8; break; - default : Loge("f8 not supported"); longjmp_exception(EX_II); break; - } - break; + Loge("f8 not supported"); longjmp_exception(EX_II); break; case 1 : switch (widening) { case vsdWidening : FPCALL_TYPE = FPCALL_W16_to_32; break; @@ -1215,7 +1213,7 @@ void mask_instr(int opcode, Decode *s) { if (s->vm == 0) { longjmp_exception(EX_II); } - if(check_vstart_ignore(s)) return; + if(check_vstart_exception(s)) return; int idx; for(idx = vstart->val; idx < vl->val; idx++) { // operand - vs2 @@ -1268,8 +1266,8 @@ vector register, not a vector register group, so any vector register can be the scalar source or destination of a vector reduction regardless of LMUL setting. */ void reduction_instr(int opcode, int is_signed, int wide, Decode *s) { - if(check_vstart_ignore(s)) return; vector_reduction_check(s, wide); + if(check_vstart_exception(s)) return; // operand - vs1 get_vreg(id_src->reg, 0, s1, vtype->vsew+wide, vtype->vlmul, is_signed, 0); if(is_signed) rtl_sext(s, s1, s1, 1 << (vtype->vsew+wide)); @@ -1311,8 +1309,8 @@ void reduction_instr(int opcode, int is_signed, int wide, Decode *s) { } void float_reduction_instr(int opcode, int widening, Decode *s) { - if(check_vstart_ignore(s)) return; vector_reduction_check(s, widening); + if(check_vstart_exception(s)) return; if (widening) get_vreg(id_src->reg, 0, s1, vtype->vsew+1, vtype->vlmul, 0, 1); else @@ -1448,8 +1446,8 @@ void float_reduction_step1(uint64_t src1, uint64_t src2, Decode *s) { } void float_reduction_computing(Decode *s) { - if(check_vstart_ignore(s)) return; vector_reduction_check(s, false); + if(check_vstart_exception(s)) return; word_t FPCALL_TYPE = FPCALL_W64; int idx; diff --git a/src/isa/riscv64/instr/rvv/vldst.h b/src/isa/riscv64/instr/rvv/vldst.h index 11a69d3ce..bb7d862ff 100644 --- a/src/isa/riscv64/instr/rvv/vldst.h +++ b/src/isa/riscv64/instr/rvv/vldst.h @@ -19,23 +19,40 @@ #include "vldst_impl.h" #include "vcompute_impl.h" +// if we decode some information in decode stage +// when running in opt mode, these information will not be generated because +// it only runs the exec functions +void predecode_vls(Decode *s) { +#ifdef CONFIG_RVV + const int table [8] = {1, 0, 0, 0, 0, 2, 4, 8}; + s->vm = s->isa.instr.v_opv.v_vm; //1 for without mask; 0 for with mask + s->v_width = table[s->isa.instr.vldfp.v_width]; + s->v_nf = s->isa.instr.vldfp.v_nf; + s->v_lsumop = s->isa.instr.vldfp.v_lsumop; +#endif +} + def_EHelper(vle) { //unit-strided + predecode_vls(s); require_vector(true); VLD(MODE_UNIT, UNSIGNED, s, MMU_DIRECT) } def_EHelper(vlm) { //mask + predecode_vls(s); require_vector(true); VLD(MODE_MASK, UNSIGNED, s, MMU_DIRECT) } def_EHelper(vlr) { // whole register + predecode_vls(s); require_vector(false); VLR(MODE_UNIT, UNSIGNED, s, MMU_DIRECT) } def_EHelper(vlse) { //strided unsigned + predecode_vls(s); require_vector(true); s->src2.reg = s->isa.instr.fp.rs2; rtl_lr(s, &(s->src2.val), s->src2.reg, 4); @@ -43,6 +60,7 @@ def_EHelper(vlse) { //strided unsigned } def_EHelper(vlxe) { + predecode_vls(s); require_vector(true); s->src2.reg = s->isa.instr.fp.rs2; rtl_lr(s, &(s->src2.val), s->src2.reg, 4); @@ -50,21 +68,25 @@ def_EHelper(vlxe) { } def_EHelper(vse) { + predecode_vls(s); require_vector(true); VST(MODE_UNIT, MMU_DIRECT) } def_EHelper(vsm) { + predecode_vls(s); require_vector(true); VST(MODE_MASK, MMU_DIRECT) } def_EHelper(vsr) { + predecode_vls(s); require_vector(false); VSR(MODE_UNIT, MMU_DIRECT) } def_EHelper(vsse) { + predecode_vls(s); require_vector(true); s->src2.reg = s->isa.instr.fp.rs2; rtl_lr(s, &(s->src2.val), s->src2.reg, 4); @@ -72,6 +94,7 @@ def_EHelper(vsse) { } def_EHelper(vsxe) { + predecode_vls(s); require_vector(true); s->src2.reg = s->isa.instr.fp.rs2; rtl_lr(s, &(s->src2.val), s->src2.reg, 4); @@ -79,21 +102,25 @@ def_EHelper(vsxe) { } def_EHelper(vle_mmu) { //unit-strided + predecode_vls(s); require_vector(true); VLD(MODE_UNIT, UNSIGNED, s, MMU_TRANSLATE) } def_EHelper(vlm_mmu) { //mask + predecode_vls(s); require_vector(true); VLD(MODE_MASK, UNSIGNED, s, MMU_TRANSLATE) } def_EHelper(vlr_mmu) { //whple register + predecode_vls(s); require_vector(false); VLR(MODE_UNIT, UNSIGNED, s, MMU_TRANSLATE) } def_EHelper(vlse_mmu) { //strided unsigned + predecode_vls(s); require_vector(true); s->src2.reg = s->isa.instr.fp.rs2; rtl_lr(s, &(s->src2.val), s->src2.reg, 4); @@ -101,6 +128,7 @@ def_EHelper(vlse_mmu) { //strided unsigned } def_EHelper(vlxe_mmu) { + predecode_vls(s); require_vector(true); s->src2.reg = s->isa.instr.fp.rs2; rtl_lr(s, &(s->src2.val), s->src2.reg, 4); @@ -108,26 +136,31 @@ def_EHelper(vlxe_mmu) { } def_EHelper(vse_mmu) { + predecode_vls(s); require_vector(true); VST(MODE_UNIT, MMU_TRANSLATE) } def_EHelper(vsm_mmu) { + predecode_vls(s); require_vector(true); VST(MODE_MASK, MMU_TRANSLATE) } def_EHelper(vsr_mmu) { + predecode_vls(s); require_vector(false); VSR(MODE_UNIT, MMU_TRANSLATE) } def_EHelper(vsse_mmu) { + predecode_vls(s); require_vector(true); VST(MODE_STRIDED, MMU_TRANSLATE) } def_EHelper(vsxe_mmu) { + predecode_vls(s); require_vector(true); s->src2.reg = s->isa.instr.fp.rs2; rtl_lr(s, &(s->src2.val), s->src2.reg, 4); diff --git a/src/isa/riscv64/instr/rvv/vldst_impl.c b/src/isa/riscv64/instr/rvv/vldst_impl.c index 9fc4879ed..71d0ecb49 100644 --- a/src/isa/riscv64/instr/rvv/vldst_impl.c +++ b/src/isa/riscv64/instr/rvv/vldst_impl.c @@ -18,11 +18,12 @@ #include #include "vldst_impl.h" +#include "vcompute_impl.h" #include "../local-include/intr.h" // reference: v_ext_macros.h in riscv-isa-sim -void isa_emul_check(int emul, int nfields) { +static void isa_emul_check(int emul, int nfields) { if (emul > 3) { Log("vector EMUL > 8 happen: EMUL:%d\n", (1 << emul)); longjmp_exception(EX_II); @@ -42,11 +43,108 @@ void isa_emul_check(int emul, int nfields) { } } +static void vstore_check(int mode, Decode *s) { + int eew = 0; + switch(s->v_width) { + case 1: eew = 0; break; + case 2: eew = 1; break; + case 4: eew = 2; break; + case 8: eew = 3; break; + default: Loge("illegal v_width: %d\n", s->v_width); + longjmp_exception(EX_II); break; + } + uint64_t veew = mode == MODE_MASK ? 1 : 8 << eew; + uint64_t vsew = 8 << vtype->vsew; + double vflmul = compute_vflmul(); + float vemul = mode == MODE_MASK ? 1 : ((float)veew / vsew * vflmul); + uint64_t emul = vemul < 1 ? 1 : vemul; + if (!(vemul >= 0.125 && vemul <= 8)) { + Loge("illegal EMUL: %f\n", vemul); + longjmp_exception(EX_II); + } + require_aligned(id_dest->reg, vemul); + uint64_t nf = s->v_nf + 1; + if (!((nf * emul <= 8) && (id_dest->reg + nf * emul <= 32))) { + Loge("illegal NFIELDS: %lu EMUL: %lu\n", nf, emul); + longjmp_exception(EX_II); + } +} + +static void vload_check(int mode, Decode *s) { + vstore_check(mode, s); + require_vm(s); +} + +static void index_vstore_check(int mode, Decode *s) { + int eew = vtype->vsew; + int elt_width = 0; + switch(s->v_width) { + case 1: elt_width = 0; break; + case 2: elt_width = 1; break; + case 4: elt_width = 2; break; + case 8: elt_width = 3; break; + default: break; + } + double vflmul = compute_vflmul(); + float vemul = (float)(8 << elt_width) / (8 << eew) * vflmul; + if (!(vemul >= 0.125 && vemul <= 8)) { + Loge("illegal EMUL: %f\n", vemul); + longjmp_exception(EX_II); + } + + uint64_t flmul = vflmul < 1 ? 1 : vflmul; + + require_aligned(id_dest->reg, vflmul); + require_aligned(id_src2->reg, vemul); + + uint64_t nf = s->v_nf + 1; + if (!((nf * flmul <= 8) && (id_dest->reg + nf * flmul <= 32))) { + Loge("illegal NFIELDS: %lu LMUL: %lu\n", nf, flmul); + longjmp_exception(EX_II); + } +} + +static void index_vload_check(int mode, Decode *s) { + index_vstore_check(mode, s); + int eew = vtype->vsew; + int elt_width = 0; + switch(s->v_width) { + case 1: elt_width = 0; break; + case 2: elt_width = 1; break; + case 4: elt_width = 2; break; + case 8: elt_width = 3; break; + default: break; + } + uint64_t nf = s->v_nf + 1; + double vflmul = compute_vflmul(); + float vemul = (float)(8 << elt_width) / (8 << eew) * vflmul; + uint64_t flmul = vflmul < 1 ? 1 : vflmul; + for (uint64_t idx = 0; idx < nf; idx++) { + uint64_t seg_vd = id_dest->reg + idx * flmul; + if (elt_width > eew) { + if (seg_vd != id_src2->reg) { + require_noover(seg_vd, vflmul, id_src2->reg, vemul); + } + } else if (elt_width < eew) { + if (vemul < 1) { + require_noover(seg_vd, vflmul, id_src2->reg, vemul); + } else { + require_noover_widen(seg_vd, vflmul, id_src2->reg, vemul); + } + } + if (nf >= 2) { + require_noover(seg_vd, vflmul, id_src2->reg, vemul); + } + } + require_vm(s); +} + void vld(int mode, int is_signed, Decode *s, int mmu_mode) { + vload_check(mode, s); if(check_vstart_ignore(s)) return; word_t idx; uint64_t nf, fn, vl_val, base_addr, vd, addr; - int eew, emul, emul_coding, stride, is_stride; + int eew, emul, stride, is_unit_stride; // s->v_width is the bytes of a unit // eew is the coding like vsew @@ -58,17 +156,17 @@ void vld(int mode, int is_signed, Decode *s, int mmu_mode) { case 8: eew = 3; break; default: break; } - emul_coding = vtype->vlmul > 4 ? vtype->vlmul - 8 + eew - vtype->vsew : vtype->vlmul + eew - vtype->vsew; - isa_emul_check(mode == MODE_MASK ? 1 : emul_coding, 1); - emul_coding = emul_coding < 0 ? 0 : emul_coding; - emul = 1 << emul_coding; + emul = vtype->vlmul > 4 ? vtype->vlmul - 8 + eew - vtype->vsew : vtype->vlmul + eew - vtype->vsew; + isa_emul_check(mode == MODE_MASK ? 1 : emul, 1); + emul = emul < 0 ? 0 : emul; + emul = 1 << emul; if (mode == MODE_STRIDED) { stride = id_src2->val; - is_stride = 0; + is_unit_stride = 0; } else { stride = 0; - is_stride = 1; + is_unit_stride = 1; } // previous decode does not load vals for us rtl_lr(s, &(s->src1.val), s->src1.reg, 4); @@ -84,24 +182,24 @@ void vld(int mode, int is_signed, Decode *s, int mmu_mode) { if (RVV_AGNOSTIC && vtype->vma) { tmp_reg[1] = (uint64_t) -1; for (fn = 0; fn < nf; fn++) { - set_vreg(vd + fn * emul, idx, tmp_reg[1], eew, emul_coding, mode == MODE_MASK ? 0 : 1); + set_vreg(vd + fn * emul, idx, tmp_reg[1], eew, 0, 0); } } continue; } for (fn = 0; fn < nf; fn++) { - addr = base_addr + idx * stride + (idx * nf * is_stride + fn) * s->v_width; + addr = base_addr + idx * stride + (idx * nf * is_unit_stride + fn) * s->v_width; rtl_lm(s, &tmp_reg[1], &addr, 0, s->v_width, mmu_mode); - set_vreg(vd + fn * emul, idx, tmp_reg[1], eew, emul_coding, mode == MODE_MASK ? 0 : 1); + set_vreg(vd + fn * emul, idx, tmp_reg[1], eew, 0, 0); } } if (RVV_AGNOSTIC && (mode == MODE_MASK || vtype->vta)) { // set tail of vector register to 1 - int vlmax = mode == MODE_MASK ? VLEN / 8 : get_vlen_max(eew, emul_coding, 0); + int vlmax = mode == MODE_MASK ? VLEN / 8 : get_vlen_max(vtype->vsew, vtype->vlmul, 0); for(idx = vl_val; idx < vlmax; idx++) { tmp_reg[1] = (uint64_t) -1; for (fn = 0; fn < nf; fn++) { - set_vreg(vd + fn * emul, idx, tmp_reg[1], eew, emul_coding, mode == MODE_MASK ? 0 : 1); + set_vreg(vd + fn * emul, idx, tmp_reg[1], eew, 0, 0); } } } @@ -115,22 +213,22 @@ void vldx(int mode, int is_signed, Decode *s, int mmu_mode) { // 5 -> 16 1 -> 16 // 6 -> 32 2 -> 32 // 7 -> 64 3 -> 64 + index_vload_check(mode, s); if(check_vstart_ignore(s)) return; word_t idx; uint64_t nf = s->v_nf + 1, fn, vl_val, base_addr, vd, index, addr; - int eew, lmul, index_width, data_length; + int eew, lmul, index_width, data_width; index_width = 0; eew = vtype->vsew; - s->v_width = s->isa.instr.vldfp.v_width; switch(s->v_width) { - case 0: index_width = 0; break; - case 5: index_width = 1; break; - case 6: index_width = 2; break; - case 7: index_width = 3; break; + case 1: index_width = 0; break; + case 2: index_width = 1; break; + case 4: index_width = 2; break; + case 8: index_width = 3; break; default: break; } - data_length = 1 << eew; + data_width = 1 << eew; lmul = vtype->vlmul > 4 ? vtype->vlmul - 8 : vtype->vlmul; isa_emul_check(lmul, nf); lmul = lmul < 0 ? 0 : lmul; @@ -149,31 +247,31 @@ void vldx(int mode, int is_signed, Decode *s, int mmu_mode) { if (RVV_AGNOSTIC && vtype->vma) { tmp_reg[1] = (uint64_t) -1; for (fn = 0; fn < nf; fn++) { - set_vreg(vd + fn * lmul, idx, tmp_reg[1], eew, vtype->vlmul, 1); + set_vreg(vd + fn * lmul, idx, tmp_reg[1], eew, 0, 0); } } continue; } for (fn = 0; fn < nf; fn++) { // read index - get_vreg(id_src2->reg, idx, &tmp_reg[2], index_width, vtype->vlmul, 0, 1); + get_vreg(id_src2->reg, idx, &tmp_reg[2], index_width, 0, 0, 0); index = tmp_reg[2]; // read data in memory - addr = base_addr + index + fn * data_length; + addr = base_addr + index + fn * data_width; s->v_is_vx = 1; - rtl_lm(s, &tmp_reg[1], &addr, 0, data_length, mmu_mode); + rtl_lm(s, &tmp_reg[1], &addr, 0, data_width, mmu_mode); s->v_is_vx = 0; - set_vreg(vd + fn * lmul, idx, tmp_reg[1], eew, vtype->vlmul, 1); + set_vreg(vd + fn * lmul, idx, tmp_reg[1], eew, 0, 0); } } if (RVV_AGNOSTIC && vtype->vta) { // set tail of vector register to 1 - int vlmax = get_vlen_max(eew, vtype->vlmul, 0); + int vlmax = get_vlen_max(vtype->vsew, vtype->vlmul, 0); for(idx = vl->val; idx < vlmax; idx++) { tmp_reg[1] = (uint64_t) -1; for (fn = 0; fn < nf; fn++) { - set_vreg(vd + fn * lmul, idx, tmp_reg[1], eew, vtype->vlmul, 1); + set_vreg(vd + fn * lmul, idx, tmp_reg[1], eew, 0, 0); } } } @@ -184,10 +282,11 @@ void vldx(int mode, int is_signed, Decode *s, int mmu_mode) { } void vst(int mode, Decode *s, int mmu_mode) { + vstore_check(mode, s); if(check_vstart_ignore(s)) return; word_t idx; uint64_t nf, fn, vl_val, base_addr, vd, addr; - int eew, emul, stride, is_stride; + int eew, emul, stride, is_unit_stride; eew = 0; switch(s->v_width) { @@ -204,10 +303,10 @@ void vst(int mode, Decode *s, int mmu_mode) { if (mode == MODE_STRIDED) { stride = id_src2->val; - is_stride = 0; + is_unit_stride = 0; } else { stride = 0; - is_stride = 1; + is_unit_stride = 1; } // previous decode does not load vals for us rtl_lr(s, &(s->src1.val), s->src1.reg, 4); @@ -223,8 +322,8 @@ void vst(int mode, Decode *s, int mmu_mode) { continue; } for (fn = 0; fn < nf; fn++) { - get_vreg(vd + fn * emul, idx, &tmp_reg[1], eew, vtype->vlmul, 0, mode == MODE_MASK ? 0 : 1); - addr = base_addr + idx * stride + (idx * nf * is_stride + fn) * s->v_width; + get_vreg(vd + fn * emul, idx, &tmp_reg[1], eew, 0, 0, 0); + addr = base_addr + idx * stride + (idx * nf * is_unit_stride + fn) * s->v_width; rtl_sm(s, &tmp_reg[1], &addr, 0, s->v_width, mmu_mode); } } @@ -234,22 +333,22 @@ void vst(int mode, Decode *s, int mmu_mode) { } void vstx(int mode, Decode *s, int mmu_mode) { + index_vload_check(mode, s); if(check_vstart_ignore(s)) return; word_t idx; uint64_t nf = s->v_nf + 1, fn, vl_val, base_addr, vd, index, addr; - int eew, lmul, index_width, data_length; + int eew, lmul, index_width, data_width; index_width = 0; eew = vtype->vsew; - s->v_width = s->isa.instr.vldfp.v_width; switch(s->v_width) { - case 0: index_width = 0; break; - case 5: index_width = 1; break; - case 6: index_width = 2; break; - case 7: index_width = 3; break; + case 1: index_width = 0; break; + case 2: index_width = 1; break; + case 4: index_width = 2; break; + case 8: index_width = 3; break; default: break; } - data_length = 1 << eew; + data_width = 1 << eew; lmul = vtype->vlmul > 4 ? vtype->vlmul - 8 : vtype->vlmul; isa_emul_check(lmul, nf); lmul = lmul < 0 ? 0 : lmul; @@ -269,14 +368,14 @@ void vstx(int mode, Decode *s, int mmu_mode) { } for (fn = 0; fn < nf; fn++) { // read index - get_vreg(id_src2->reg, idx, &tmp_reg[2], index_width, vtype->vlmul, 0, 1); + get_vreg(id_src2->reg, idx, &tmp_reg[2], index_width, 0, 0, 0); index = tmp_reg[2]; // read data in vector register - get_vreg(vd + fn * lmul, idx, &tmp_reg[1], eew, vtype->vlmul, 0, 1); - addr = base_addr + index + fn * data_length; + get_vreg(vd + fn * lmul, idx, &tmp_reg[1], eew, 0, 0, 0); + addr = base_addr + index + fn * data_width; s->v_is_vx = 1; - rtl_sm(s, &tmp_reg[1], &addr, 0, data_length, mmu_mode); + rtl_sm(s, &tmp_reg[1], &addr, 0, data_width, mmu_mode); s->v_is_vx = 0; } } @@ -286,7 +385,7 @@ void vstx(int mode, Decode *s, int mmu_mode) { vp_set_dirty(); } -void isa_whole_reg_check(uint64_t vd, uint64_t nfields) { +static void isa_whole_reg_check(uint64_t vd, uint64_t nfields) { if (nfields != 1 && nfields != 2 && nfields != 4 && nfields != 8) { Log("illegal NFIELDS for whole register instrs: NFIELDS:%lu", nfields); longjmp_exception(EX_II); diff --git a/src/isa/riscv64/instr/rvv/vreg_impl.c b/src/isa/riscv64/instr/rvv/vreg_impl.c index 512a3082f..87f3025b1 100644 --- a/src/isa/riscv64/instr/rvv/vreg_impl.c +++ b/src/isa/riscv64/instr/rvv/vreg_impl.c @@ -40,7 +40,7 @@ rtlreg_t check_vsetvl(rtlreg_t vtype_req, rtlreg_t vl_req, int mode) { if (mode == 1) { return VLMAX; } else if (mode == 2) { - return old_vl; + return old_vl < VLMAX ? old_vl : VLMAX; } else { if (vt.vsew > 3) { //check if max-len supported return (uint64_t)-1; //return 0 means error, including vl_req is 0, for vl_req should not be 0. @@ -102,7 +102,7 @@ int get_idx(uint64_t reg, int idx, uint64_t vsew) { void isa_misalign_vreg_check(uint64_t reg, uint64_t vlmul, int needAlign) { if (needAlign && vlmul < 4) { if (reg % (1 << vlmul) != 0) { - Log("vector register group misaligned happen: reg:x%lu vlmul:0x%lx needAlign:%d", reg, vlmul, needAlign); + Loge("vector register group misaligned happen: reg:x%lu vlmul:0x%lx needAlign:%d", reg, vlmul, needAlign); longjmp_exception(EX_II); } } diff --git a/src/isa/riscv64/system/priv.c b/src/isa/riscv64/system/priv.c index bc40c512d..b753eabcd 100644 --- a/src/isa/riscv64/system/priv.c +++ b/src/isa/riscv64/system/priv.c @@ -674,6 +674,7 @@ static inline void csr_write(word_t *dest, word_t src) { else if (is_write(vcsr)) { *dest = src & 0b111; vxrm->val = (src >> 1) & 0b11; vxsat->val = src & 0b1; } else if (is_write(vxrm)) { *dest = src & 0b11; vcsr->val = (vxrm->val) << 1 | vxsat->val; } else if (is_write(vxsat)) { *dest = src & 0b1; vcsr->val = (vxrm->val) << 1 | vxsat->val; } + else if (is_write(vstart)) { *dest = src & (VLEN - 1); } #endif #ifdef CONFIG_MISA_UNCHANGEABLE else if (is_write(misa)) { /* do nothing */ }