我在Spike上模拟了裸机通用Riscv程序。现在我想在Spike上运行裸机矢量程序,但我认为用于交叉编译并通过Spike生成日志文件的C程序的构建方式将与通用目的不同。但我找不到任何用于模拟尖峰矢量的文档。有人可以指导我完成这个吗?
我已经为 rv64 gcv 配置了工具链和尖峰,并尝试制作具有向量内在函数的主函数,但尽管在工具链生成的汇编文件上获取了向量指令,但我的日志文件仅包含通用指令。
这是我的C程序:
#include <stddef.h>
#include <riscv_vector.h>
volatile uint64_t tohost __attribute__((section(".tohost")));
volatile uint64_t fromhost __attribute__((section(".fromhost")));
void saxpy(size_t n, const double a, const double *x, double *y) {
size_t vl;
vfloat64m8_t vx, vy;
for (; n > 0; n -= vl) {
vl = vsetvl_e64m8(n);
vx = vle64_v_f64m8(x, vl);
vy = vle64_v_f64m8(y, vl);
vy = vfmsac_vf_f64m8(vy, a, vx, vl);
vse64_v_f64m8(y, vy, vl);
x += vl;
y += vl;
}
// Signal completion to the "tohost" register after the loop
tohost = 1;
}
int main() {
const size_t array_size = 8;
const double a = 2.0;
double x[array_size];
double y[array_size];
// Initialize arrays x and y with data
for (size_t i = 0; i < array_size; i++) {
x[i] = (double)(i + 1); // Values 1.0, 2.0, 3.0, ..., 8.0
y[i] = (double)((i + 1) * 10); // Values 10.0, 20.0, 30.0, ..., 80.0
}
saxpy(array_size, a, x, y);
return 0;
}
这是工具链生成的程序集:
.file "vector.c"
.option nopic
.attribute arch, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.globl tohost
.section .tohost,"aw"
.align 3
.type tohost, @object
.size tohost, 8
tohost:
.zero 8
.globl fromhost
.section .fromhost,"aw"
.align 3
.type fromhost, @object
.size fromhost, 8
fromhost:
.zero 8
.text
.align 1
.globl saxpy
.type saxpy, @function
saxpy:
addi sp,sp,-64
sd s0,56(sp)
addi s0,sp,64
csrr t0,vlenb
slli t1,t0,4
sub sp,sp,t1
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
sd a0,0(a5)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-32
addi a5,a5,-16
add a5,a5,s0
sd a1,0(a5)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a5,a5,s0
sd a2,0(a5)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a5,a5,s0
sd a3,0(a5)
j .L2
.L3:
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
vsetvli a5,a5,e64,m8,ta,mu
sd a5,-24(s0)
csrr a5,vlenb
neg a5,a5
slli a5,a5,3
addi a5,a5,-16
addi a5,a5,-16
add a3,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a5,a5,s0
ld a2,0(a5)
ld a4,-24(s0)
vsetvli zero,a4,e64,m8,ta,mu
vle64.v v24,(a2)
vs8r.v v24,0(a3)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a3,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a5,a5,s0
ld a2,0(a5)
ld a4,-24(s0)
vle64.v v24,(a2)
vs8r.v v24,0(a3)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a3,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a2,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-32
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,3
addi a5,a5,-16
addi a5,a5,-16
add a5,a5,s0
vl8re64.v v24,0(a2)
fld fa5,0(a4)
vl8re64.v v8,0(a5)
ld a4,-24(s0)
vsetvli zero,a4,e64,m8,tu,mu
vfmsac.vf v24,fa5,v8
vs8r.v v24,0(a3)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a5,a5,s0
ld a3,0(a4)
vl8re64.v v24,0(a5)
ld a4,-24(s0)
vsetvli zero,a4,e64,m8,ta,mu
vse64.v v24,(a3)
ld a5,-24(s0)
slli a3,a5,3
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
add a5,a5,a3
sd a5,0(a4)
ld a5,-24(s0)
slli a3,a5,3
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
add a5,a5,a3
sd a5,0(a4)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
ld a3,0(a5)
ld a5,-24(s0)
sub a5,a3,a5
sd a5,0(a4)
.L2:
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
bne a5,zero,.L3
lui a5,%hi(tohost)
li a4,1
sd a4,%lo(tohost)(a5)
nop
csrr t0,vlenb
slli t1,t0,4
add sp,sp,t1
ld s0,56(sp)
addi sp,sp,64
jr ra
.size saxpy, .-saxpy
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-112
sd ra,104(sp)
sd s0,96(sp)
sd s1,88(sp)
sd s2,80(sp)
sd s3,72(sp)
addi s0,sp,112
mv t0,sp
mv s1,t0
li t0,8
sd t0,-64(s0)
lui t0,%hi(.LC0)
fld fa5,%lo(.LC0)(t0)
fsd fa5,-72(s0)
ld t0,-64(s0)
addi t0,t0,-1
sd t0,-80(s0)
ld t0,-64(s0)
mv s2,t0
li s3,0
srli t0,s2,58
slli a7,s3,6
or a7,t0,a7
slli a6,s2,6
ld a6,-64(s0)
mv t5,a6
li t6,0
srli a6,t5,58
slli a1,t6,6
or a1,a6,a1
slli a0,t5,6
ld a1,-64(s0)
slli a1,a1,3
addi a1,a1,15
srli a1,a1,4
slli a1,a1,4
sub sp,sp,a1
mv a1,sp
addi a1,a1,7
srli a1,a1,3
slli a1,a1,3
sd a1,-88(s0)
ld a1,-64(s0)
addi a1,a1,-1
sd a1,-96(s0)
ld a1,-64(s0)
mv t3,a1
li t4,0
srli a1,t3,58
slli a3,t4,6
or a3,a1,a3
slli a2,t3,6
ld a3,-64(s0)
mv t1,a3
li t2,0
srli a3,t1,58
slli a5,t2,6
or a5,a3,a5
slli a4,t1,6
ld a5,-64(s0)
slli a5,a5,3
addi a5,a5,15
srli a5,a5,4
slli a5,a5,4
sub sp,sp,a5
mv a5,sp
addi a5,a5,7
srli a5,a5,3
slli a5,a5,3
sd a5,-104(s0)
sd zero,-56(s0)
j .L5
.L6:
ld a5,-56(s0)
addi a5,a5,1
fcvt.d.lu fa5,a5
ld a4,-88(s0)
ld a5,-56(s0)
slli a5,a5,3
add a5,a4,a5
fsd fa5,0(a5)
ld a5,-56(s0)
addi a4,a5,1
mv a5,a4
slli a5,a5,2
add a5,a5,a4
slli a5,a5,1
fcvt.d.lu fa5,a5
ld a4,-104(s0)
ld a5,-56(s0)
slli a5,a5,3
add a5,a4,a5
fsd fa5,0(a5)
ld a5,-56(s0)
addi a5,a5,1
sd a5,-56(s0)
.L5:
ld a4,-56(s0)
ld a5,-64(s0)
bltu a4,a5,.L6
ld a3,-104(s0)
ld a2,-88(s0)
ld a1,-72(s0)
ld a0,-64(s0)
call saxpy
li a5,0
mv sp,s1
mv a0,a5
addi sp,s0,-112
ld ra,104(sp)
ld s0,96(sp)
ld s1,88(sp)
ld s2,80(sp)
ld s3,72(sp)
addi sp,sp,112
jr ra
.size main, .-main
.section .rodata
.align 3
.LC0:
.word 0
.word 1073741824
.ident "GCC: () 12.0.1 20220505 (prerelease)"
这是spike生成的日志文件
core 0: 0x0000000000001000 (0x00000297) auipc t0, 0x0
core 0: 3 0x0000000000001000 (0x00000297) x5 0x0000000000001000
core 0: 0x0000000000001004 (0x02028593) addi a1, t0, 32
core 0: 3 0x0000000000001004 (0x02028593) x11 0x0000000000001020
core 0: 0x0000000000001008 (0xf1402573) csrr a0, mhartid
core 0: 3 0x0000000000001008 (0xf1402573) x10 0x0000000000000000
core 0: 0x000000000000100c (0x0182b283) ld t0, 24(t0)
core 0: 3 0x000000000000100c (0x0182b283) x5 0x0000000000010116 mem 0x0000000000001018
core 0: 0x0000000000001010 (0x00028067) jr t0
core 0: 3 0x0000000000001010 (0x00028067)
core 0: >>>> _start
core 0: 0x0000000000010116 (0x00002197) auipc gp, 0x2
core 0: 3 0x0000000000010116 (0x00002197) x3 0x0000000000012116
core 0: 0x000000000001011a (0xeea18193) addi gp, gp, -278
core 0: 3 0x000000000001011a (0xeea18193) x3 0x0000000000012000
core 0: 0x000000000001011e (0xf7018513) addi a0, gp, -144
core 0: 3 0x000000000001011e (0xf7018513) x10 0x0000000000011f70
core 0: 0x0000000000010122 (0xfa818613) addi a2, gp, -88
core 0: 3 0x0000000000010122 (0xfa818613) x12 0x0000000000011fa8
core 0: 0x0000000000010126 (0x00008e09) c.sub a2, a0
core 0: 3 0x0000000000010126 (0x8e09) x12 0x0000000000000038
core 0: 0x0000000000010128 (0x00004581) c.li a1, 0
core 0: 3 0x0000000000010128 (0x4581) x11 0x0000000000000000
core 0: 0x000000000001012a (0x464000ef) jal pc + 0x464
core 0: 3 0x000000000001012a (0x464000ef) x1 0x000000000001012e
core 0: >>>> memset
core 0: 0x000000000001058e (0x0000433d) c.li t1, 15
core 0: 3 0x000000000001058e (0x433d) x6 0x000000000000000f
core 0: 0x0000000000010590 (0x0000872a) c.mv a4, a0
core 0: 3 0x0000000000010590 (0x872a) x14 0x0000000000011f70
core 0: 0x0000000000010592 (0x02c37163) bgeu t1, a2, pc + 34
core 0: 3 0x0000000000010592 (0x02c37163)
core 0: 0x0000000000010596 (0x00f77793) andi a5, a4, 15
core 0: 3 0x0000000000010596 (0x00f77793) x15 0x0000000000000000
core 0: 0x000000000001059a (0x0000e3c1) c.bnez a5, pc + 128
core 0: 3 0x000000000001059a (0xe3c1)
core 0: 0x000000000001059c (0x0000e1bd) c.bnez a1, pc + 102
core 0: 3 0x000000000001059c (0xe1bd)
core 0: 0x000000000001059e (0xff067693) andi a3, a2, -16
core 0: 3 0x000000000001059e (0xff067693) x13 0x0000000000000030
core 0: 0x00000000000105a2 (0x00008a3d) c.andi a2, 15
core 0: 3 0x00000000000105a2 (0x8a3d) x12 0x0000000000000008
core 0: 0x00000000000105a4 (0x000096ba) c.add a3, a4
core 0: 3 0x00000000000105a4 (0x96ba) x13 0x0000000000011fa0
core 0: 0x00000000000105a6 (0x0000e30c) c.sd a1, 0(a4)
core 0: 3 0x00000000000105a6 (0xe30c) mem 0x0000000000011f70 0x0000000000000000
core 0: 0x00000000000105a8 (0x0000e70c) c.sd a1, 8(a4)
core 0: 3 0x00000000000105a8 (0xe70c) mem 0x0000000000011f78 0x0000000000000000
core 0: 0x00000000000105aa (0x00000741) c.addi a4, 16
core 0: 3 0x00000000000105aa (0x0741) x14 0x0000000000011f80
core 0: 0x00000000000105ac (0xfed76de3) bltu a4, a3, pc - 6
core 0: 3 0x00000000000105ac (0xfed76de3)
core 0: 0x00000000000105a6 (0x0000e30c) c.sd a1, 0(a4)
core 0: 3 0x00000000000105a6 (0xe30c) mem 0x0000000000011f80 0x0000000000000000
core 0: 0x00000000000105a8 (0x0000e70c) c.sd a1, 8(a4)
core 0: 3 0x00000000000105a8 (0xe70c) mem 0x0000000000011f88 0x0000000000000000
core 0: 0x00000000000105aa (0x00000741) c.addi a4, 16
core 0: 3 0x00000000000105aa (0x0741) x14 0x0000000000011f90
core 0: 0x00000000000105ac (0xfed76de3) bltu a4, a3, pc - 6
core 0: 3 0x00000000000105ac (0xfed76de3)
core 0: 0x00000000000105a6 (0x0000e30c) c.sd a1, 0(a4)
core 0: 3 0x00000000000105a6 (0xe30c) mem 0x0000000000011f90 0x0000000000000000
core 0: 0x00000000000105a8 (0x0000e70c) c.sd a1, 8(a4)
core 0: 3 0x00000000000105a8 (0xe70c) mem 0x0000000000011f98 0x0000000000000000
core 0: 0x00000000000105aa (0x00000741) c.addi a4, 16
core 0: 3 0x00000000000105aa (0x0741) x14 0x0000000000011fa0
core 0: 0x00000000000105ac (0xfed76de3) bltu a4, a3, pc - 6
core 0: 3 0x00000000000105ac (0xfed76de3)
core 0: 0x00000000000105b0 (0x0000e211) c.bnez a2, pc + 4
core 0: 3 0x00000000000105b0 (0xe211)
core 0: 0x00000000000105b4 (0x40c306b3) sub a3, t1, a2
core 0: 3 0x00000000000105b4 (0x40c306b3) x13 0x0000000000000007
core 0: 0x00000000000105b8 (0x0000068a) c.slli a3, 2
core 0: 3 0x00000000000105b8 (0x068a) x13 0x000000000000001c
core 0: 0x00000000000105ba (0x00000297) auipc t0, 0x0
core 0: 3 0x00000000000105ba (0x00000297) x5 0x00000000000105ba
core 0: 0x00000000000105be (0x00009696) c.add a3, t0
core 0: 3 0x00000000000105be (0x9696) x13 0x00000000000105d6
core 0: 0x00000000000105c0 (0x00a68067) jalr zero, a3, 10
core 0: 3 0x00000000000105c0 (0x00a68067)
core 0: 0x00000000000105e0 (0x00b703a3) sb a1, 7(a4)
core 0: 3 0x00000000000105e0 (0x00b703a3) mem 0x0000000000011fa7 0x00
core 0: 0x00000000000105e4 (0x00b70323) sb a1, 6(a4)
core 0: 3 0x00000000000105e4 (0x00b70323) mem 0x0000000000011fa6 0x00
core 0: 0x00000000000105e8 (0x00b702a3) sb a1, 5(a4)
core 0: 3 0x00000000000105e8 (0x00b702a3) mem 0x0000000000011fa5 0x00
core 0: 0x00000000000105ec (0x00b70223) sb a1, 4(a4)
core 0: 3 0x00000000000105ec (0x00b70223) mem 0x0000000000011fa4 0x00
core 0: 0x00000000000105f0 (0x00b701a3) sb a1, 3(a4)
core 0: 3 0x00000000000105f0 (0x00b701a3) mem 0x0000000000011fa3 0x00
core 0: 0x00000000000105f4 (0x00b70123) sb a1, 2(a4)
core 0: 3 0x00000000000105f4 (0x00b70123) mem 0x0000000000011fa2 0x00
core 0: 0x00000000000105f8 (0x00b700a3) sb a1, 1(a4)
core 0: 3 0x00000000000105f8 (0x00b700a3) mem 0x0000000000011fa1 0x00
core 0: 0x00000000000105fc (0x00b70023) sb a1, 0(a4)
core 0: 3 0x00000000000105fc (0x00b70023) mem 0x0000000000011fa0 0x00
core 0: 0x0000000000010600 (0x00008082) ret
core 0: 3 0x0000000000010600 (0x8082)
core 0: 0x000000000001012e (0x00000517) auipc a0, 0x0
core 0: 3 0x000000000001012e (0x00000517) x10 0x000000000001012e
core 0: 0x0000000000010132 (0x60050513) addi a0, a0, 1536
core 0: 3 0x0000000000010132 (0x60050513) x10 0x000000000001072e
core 0: 0x0000000000010136 (0x0000c519) c.beqz a0, pc + 14
core 0: 3 0x0000000000010136 (0xc519)
core 0: 0x0000000000010138 (0x00000517) auipc a0, 0x0
core 0: 3 0x0000000000010138 (0x00000517) x10 0x0000000000010138
core 0: 0x000000000001013c (0x5c250513) addi a0, a0, 1474
core 0: 3 0x000000000001013c (0x5c250513) x10 0x00000000000106fa
core 0: 0x0000000000010140 (0x5ee000ef) jal pc + 0x5ee
core 0: 3 0x0000000000010140 (0x5ee000ef) x1 0x0000000000010144
core 0: >>>> atexit
core 0: 0x000000000001072e (0x000085aa) c.mv a1, a0
core 0: 3 0x000000000001072e (0x85aa) x11 0x00000000000106fa
core 0: 0x0000000000010730 (0x00004681) c.li a3, 0
core 0: 3 0x0000000000010730 (0x4681) x13 0x0000000000000000
core 0: 0x0000000000010732 (0x00004601) c.li a2, 0
core 0: 3 0x0000000000010732 (0x4601) x12 0x0000000000000000
core 0: 0x0000000000010734 (0x00004501) c.li a0, 0
core 0: 3 0x0000000000010734 (0x4501) x10 0x0000000000000000
core 0: 0x0000000000010736 (0x0000a009) c.j pc + 2
core 0: 3 0x0000000000010736 (0xa009)
core 0: >>>> __register_exitproc
core 0: 0x0000000000010738 (0xf581b703) ld a4, -168(gp)
core 0: 3 0x0000000000010738 (0xf581b703) x14 0x0000000000011800 mem 0x0000000000011f58
core 0: 0x000000000001073c (0x1f873783) ld a5, 504(a4)
core 0: 3 0x000000000001073c (0x1f873783) x15 0x0000000000000000 mem 0x00000000000119f8
core 0: 0x0000000000010740 (0x0000c3b1) c.beqz a5, pc + 68
core 0: 3 0x0000000000010740 (0xc3b1)
core 0: 0x0000000000010784 (0x20070793) addi a5, a4, 512
core 0: 3 0x0000000000010784 (0x20070793) x15 0x0000000000011a00
core 0: 0x0000000000010788 (0x1ef73c23) sd a5, 504(a4)
core 0: 3 0x0000000000010788 (0x1ef73c23) mem 0x00000000000119f8 0x0000000000011a00
core 0: 0x000000000001078c (0x0000bf5d) c.j pc - 74
core 0: 3 0x000000000001078c (0xbf5d)
core 0: 0x0000000000010742 (0x00004798) c.lw a4, 8(a5)
core 0: 3 0x0000000000010742 (0x4798) x14 0x0000000000000000 mem 0x0000000000011a08
core 0: 0x0000000000010744 (0x0000487d) c.li a6, 31
core 0: 3 0x0000000000010744 (0x487d) x16 0x000000000000001f
core 0: 0x0000000000010746 (0x06e84263) blt a6, a4, pc + 100
core 0: 3 0x0000000000010746 (0x06e84263)
core 0: 0x000000000001074a (0x0000c505) c.beqz a0, pc + 40
core 0: 3 0x000000000001074a (0xc505)
core 0: 0x0000000000010772 (0x00270693) addi a3, a4, 2
core 0: 3 0x0000000000010772 (0x00270693) x13 0x0000000000000002
core 0: 0x0000000000010776 (0x0000068e) c.slli a3, 3
core 0: 3 0x0000000000010776 (0x068e) x13 0x0000000000000010
core 0: 0x0000000000010778 (0x00002705) c.addiw a4, 1
core 0: 3 0x0000000000010778 (0x2705) x14 0x0000000000000001
core 0: 0x000000000001077a (0x0000c798) c.sw a4, 8(a5)
core 0: 3 0x000000000001077a (0xc798) mem 0x0000000000011a08 0x00000001
core 0: 0x000000000001077c (0x000097b6) c.add a5, a3
core 0: 3 0x000000000001077c (0x97b6) x15 0x0000000000011a10
core 0: 0x000000000001077e (0x0000e38c) c.sd a1, 0(a5)
core 0: 3 0x000000000001077e (0xe38c) mem 0x0000000000011a10 0x00000000000106fa
core 0: 0x0000000000010780 (0x00004501) c.li a0, 0
core 0: 3 0x0000000000010780 (0x4501) x10 0x0000000000000000
core 0: 0x0000000000010782 (0x00008082) ret
core 0: 3 0x0000000000010782 (0x8082)
core 0: 0x0000000000010144 (0x3e0000ef) jal pc + 0x3e0
core 0: 3 0x0000000000010144 (0x3e0000ef) x1 0x0000000000010148
core 0: >>>> __libc_init_array
core 0: 0x0000000000010524 (0x00001101) c.addi sp, -32
core 0: 3 0x0000000000010524 (0x1101) x2 0xffffffffffffffe0
core 0: 0x0000000000010526 (0x0000e822) c.sdsp s0, 16(sp)
我的工具链配置是:
../configure --prefix=$HOME/rvv64 --with-arch=rv64gcv --with-abi=lp64 --enable-multilib
我的尖峰配置是:
../configure --prefix=$HOME/rvv64 --with-varch=vlen:128,elen=32 --with-isa=rv64iv --with-target=riscv64-unknown-elf