例外4:inst / data fetch中的未对齐地址:0x100100bb

问题描述 投票:0回答:1

我正试图在MIPS模拟器(QtMips)中使用Assembly进行4x4矩阵乘法。 QtMips给了我Exception 4: Unaligned Address in inst/data fetch: 0x100100bb

这是我单步时出错的地方。

    [00400070] c52b0000  lwc1 $f11, 0($9) ; 80: lwc1 $f11 0($t1) #load float from array1

当计数器k = 2时发生错误,这意味着当它处于第三个循环时。我假设第三次加载时,32位对齐有问题,lwc1

这是我尝试/阅读但不起作用的内容:

  1. This建议我在.data中的数组(矩阵)声明之前放置.align 2或.align 4。没工作。
  2. This建议它可能是size值的问题(在array3之后定义)。但我正在通过lw $s1 size将其加载到s1,所以我不认为这对我来说是一个真正的问题。

我很失落怎么办。请给我一些智慧。

以下是我的全部代码:

    # here's our array data, two args and a result
    .data
    .globl array1
    .globl array2
    .globl array3

    .align 5 #align the data set
array1: .float 1.00, 0.00, 3.14, 2.72, 2.72, 1.00, 0.00, 3.14, 1.00, 1.00, 1.00, 1.00, 1.00, 2.00, 3.00, 4.00
    .align 5 #align the data set
array2: .float 1.00, 1.00, 0.00, 3.14, 0.00, 1.00, 3.14, 2.72, 0.00, 1.00, 1.00, 0.00, 4.00, 3.00, 2.00, 1.00
    .align 5 #align the data set
array3: .float 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00

size: .word 4 #store float in s2

    .text
    .globl main
main:
    sw $31 saved_ret_pc

    .data
lb_:    .asciiz "Vector Multiplication\n"
lbd_:   .byte 1, -1, 0, 128
lbd1_:  .word 0x76543210, 0xfedcba98
    .text
    li $v0 4    # syscall 4 (print_str)
    la $a0 lb_
    syscall

# main program: multiply matrix 1 and 2, store in array3

la $t1 array1
la $t2 array2
la $t3 array3 ###load arrrays to registers


li $t4 4 # i loop counter    -> I changed addi to li
li $t5 4 # j loop counter
li $t6 4 # k loop counter

lw $s1 size # load matrix(array) size


i_loop:
    j j_loop
j_loop:
    j k_loop
k_loop:
    #f0 and f1 - float func return values
    #f10 - multiplication return values
    #f4, f5 - register to store addr offset

    lwc1 $f11 0($t1) #load float from array1
    lwc1 $f12 0($t2) #load float from array2
    lwc1 $f13 0($t3) #load float from result array3
    nop 
    mul.s $f10 $f11 $f12 #multiply floats, store result as temp in $f10
    nop

    add.s $f13 $f13 $f10 #add to multiplication result to resulting array3

    swc1 $f13 0($t3) #store the resulting float in array3

#call index_of_A
    move $s0 $ra    #save return address into s0
    nop
    jal index_of_A  #get addr offset for array1
    nop
    move $ra $s0    #restore return address that was saved into s0

#call index_of_B
    move $s0 $ra    #save return address into s0
    nop
    jal index_of_B  #get addr offset for array2
    nop
    move $ra $s0    #restore return address that was saved into s0

    add $t1 $t1 $s2 # next address in the array1
    add $t2 $t2 $s3 # next address in the array2
    addi $t3 $t3 4 # next address in the array3

    addi $t6 $t6 -1 #decrease k counter
    bne $t6 $0 k_loop #repeat k_loop

    addi $t5 $t5 -1 #decrease j counter
    bne $t5 $0 j_loop #repeat j_loop

    addi $t4 $t4 -1 #decrease i counter
    bne $t4 $0 i_loop #repeat i_loop

#used regs: f0-f5, f10-13
index_of_A: #function for array1 addr offset    #may need to convert all to float first
    #size*i + k #$f20*i + k
    mul $s2 $s1 $t4 # 4*i, 
    add $s2 $s2 $t6 # + k, store in $s2
    jr $ra #jump back to the caller


index_of_B: #function for array2 addr offset
    #4*k + j
    mul $s3 $s1 $t6 # 4*k, 
    add $s3 $s3 $t5 # + j, store in $s3
    jr $ra #jump back to the caller


# Done multiplying...
    .data
sm: .asciiz "Done multiplying\n"
    .text
print_and_end:
    li $v0 4    # syscall 4 (print_str)
    la $a0 sm
    syscall

# Done with the program!
    lw $31 saved_ret_pc
    jr $31      # Return from main

#Terminate the program
    li $v0, 10
    syscall

.end main

但是我不明白什么是错的,因为同样的代码在我的另一个例子中起作用:

assembly mips
1个回答
0
投票

4x4矩阵乘法好的,所以我想出来所以我回答了我自己的问题。

我沿途学到了很多东西,其中包括

  1. .align不是运行代码所必需的。它没有它们。也许在这种特殊情况下我并不需要它。
  2. $ f12和$ f13专门用于打印浮动。如果将浮动保存在其他位置,则不会打印。
  3. 我做的第一个偏移计算是0,这就是为什么我需要在循环的顶部而不是在结尾处添加它。这就是导致所有麻烦的原因。
  4. 务必正确计算索引。看看我的代码评论,看看我做了什么:)

这是我的代码的最终版本有效!你可以看到我的GitHub用于矩阵乘法Python,C和Assembly.https://github.com/leochoo/cmpa

.data

#define matrices
    .globl A
    .globl B
    .globl R

    .align 4 #align the data set
    A: .float 1.00, 0.00, 3.14, 2.72, 2.72, 1.00, 0.00, 3.14, 1.00, 1.00, 1.00, 1.00, 1.00, 2.00, 3.00, 4.00
    .align 4 
    B: .float 1.00, 1.00, 0.00, 3.14, 0.00, 1.00, 3.14, 2.72, 0.00, 1.00, 1.00, 0.00, 4.00, 3.00, 2.00, 1.00
    .align 4 
    R: .float 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00


    matrix_size: .word 4 #row and column size
    float_size: .word 4 #float is 4-byte in MIPS.
                        #i.e. 4-byte will take up 16-bit blocks in the memory,
                        #hence in hexadecimal address, 
                        #array[0] at 10010040, array[1] at 10010050.

    tempSum: .float 0.00 #initialize tempSum as 0

    lineBrk: .asciiz "\n"


    #For debugging
    arr_1: .asciiz "A: "
    arr_2: .asciiz " B: "
    arr_3: .asciiz " R: "
    i_:     .asciiz " i:"
    j_:     .asciiz "j:"
    k_:     .asciiz "k:"
    space_: .asciiz " "
    bar_:   .asciiz " | "





#TEXT (MAIN) SECTION - multiply matrix 1 and 2, store in array3
.text
    .globl main
main:

    #print title
        .data
    lb_:    .asciiz "Vector Multiplication\n"
    lbd_:   .byte 1, -1, 0, 128
    lbd1_:  .word 0x76543210, 0xfedcba98
        .text
    li $v0 4    # syscall 4 (print_str)
    la $a0 lb_
    syscall

#load matrices
la $t1 A
la $t2 B
la $t3 R

#load variables
li $s1 0 # later used to store offset of matrix B
lw $s1 matrix_size # $s1 = matrix_size
lw $s2 float_size # $s2 = float_size
l.s $f5 tempSum #tempSum 


#store base addresses
move $s6 $t1 # $s6 = base address of matrix A stored
move $s7 $t2 # $s7 = base address of matrix B stored



#for i in 0...4:
    #for j in 0...4:
        #for k in 0...4:
    li $t4 0 # i counter
i_loop:
        li $t5 0 # j counter
    j_loop:
            li $t6 0 # k counter
        k_loop:
            #update index of A[i:t4][k:t6]
                # $s0 = offset result
                # $s1 = matrix_size: 4
                # $s2 = float_size: 4
                # $s6 = base address of A

                #calculate offset
                mul $s0 $s1 $t4 # s0 = matrix_size*i
                add $s0 $s0 $t6 # s0 = s0 + k
                mul $s0 $s0 $s2 # s0 = float_size*s0

                #increase by offset
                add $t1 $s6 $s0 # new index = base_addr + offset  ##first loop initialization will always be zero... oh..

            #update index of B[k:t6][j:t5]
                # $s0 = offset result
                # $s1 = matrix_size: 4
                # $s2 = float_size: 4
                # $s7 = base address of B

                #caculate offset
                mul $s0 $s1 $t6 # s0 = matrix_size*k
                add $s0 $s0 $t5 # s0 = s0 + j
                mul $s0 $s0 $s2 # s0 = float_size*s0

                #increase by offset
                add $t2 $s7 $s0 # new index = base_addr + offset


            #load matrix A and B
            lwc1 $f1 0($t1) #load float from matrix A
            lwc1 $f2 0($t2) #load float from matrix B
            nop
                #print i, j, k

                li $v0 4        
                la $a0 i_
                syscall         # "i"

                li $v0 1 
                move $a0 $t4
                syscall         # value of i

                li $v0 4        
                la $a0 j_
                syscall         # "j"

                li $v0 1 
                move $a0 $t5
                syscall         # value of j


                li $v0 4        
                la $a0 k_
                syscall         # "k"

                li $v0 1 
                move $a0 $t6
                syscall         # value of k

                li $v0 4        # " | "
                la $a0 bar_
                syscall 

                #print A and B
                li $v0 4    
                la $a0 arr_1
                syscall

                lwc1 $f12 0($t1) #A
                li $v0 2
                syscall

                li $v0 4    
                la $a0 arr_2
                syscall

                lwc1 $f12 0($t2) #B
                li $v0 2
                syscall


            #Break down: R[i][j] +=  float_size * ( A[i][k] * B[k][j] )
            #### first result: (1*1)+(0*0)+(3.14*0)+(2.72*4)

            # (A * B)
            nop
            mul.s $f0 $f1 $f2 # (a*b)
            nop
            #tempSum:$f5 = tempSum + (A * B)
            add.s $f5 $f5 $f0
            nop
                ####1st = (A*B)
                ####2nd = (A*B) + (A*B)         


            #DON'T UPDATE index of R here
            #you only need to update it 16 times, hence in j_loop

        #k_loop end condition
        addi $t6 $t6 1 # k++
        bne $t6 $s1 k_loop #if k != 4, repeat k_loop


    #store R[i][j] = tempSum:$f5
    swc1 $f5 0($t3) #store the resulting float in array3
    nop

    #reset tempSum = 0
    l.s $f5 tempSum

    #load and print element in R
    li $v0 4    
    la $a0 arr_3 # " R "
    syscall     

    lwc1 $f12 0($t3)
    li $v0 2
    syscall 

    li $v0 4
    la $a0 lineBrk #print( '\n' )
    syscall

    #update index of R[i][j] - same as updating index of A
    add $t3 $t3 $s2


    #j_loop end condition
    addi $t5 $t5 1 
    bne $t5 $s1 j_loop 

#i_loop end condition
addi $t4 $t4 1 
bne $t4 $s1 i_loop


# Done multiplying...
    .data
sm: .asciiz "Done multiplying\n"
    .text
print_and_end:
    li $v0 4    # syscall 4 (print_str)
    la $a0 sm
    syscall

#Terminate the program
    li $v0, 10
    syscall

.end main
© www.soinside.com 2019 - 2024. All rights reserved.