原子增量在中断中无法按预期工作

问题描述 投票:0回答:1

我有一个带有 RT1176 SoC(800MHz Cortex-m7 和 400MHz Cortex-m4)的 Google Coral Dev Micro,m7 运行 FreeRTOS,m4 运行裸机,使用 GCC none eabi 9.3.1 进行编译,并带有以下标志:

-Wall -Wno-psabi -mthumb -fno-common -ffunction-sections -fdata-sections -ffreestanding -fno-builtin -mapcs-frame --specs=nano.specs --specs=nosys.specs -u _printf_float -std=gnu99 -g -Os -save-temps -mcpu=cortex-m4 -mfloat-abi=hard -mfpu=fpv4-sp-d16 -DNDEBUG

我想使用共享内存在两个内核之间传输定时器中断中生成的数据,并且我在共享内存中创建了自己的无锁 FIFO 缓冲区:

fifo.h:

#include <array>
#include <atomic>
#include <cstddef>
#include <cstdint>

template <typename T, size_t num_elements>
class LockLessFifo {
   public:
    LockLessFifo() : read_idx(0), write_idx(0), buffer({0}) {}

    size_t size() {
        if (read_idx <= write_idx) {
            return write_idx - read_idx;
        } else {
            return num_elements - (read_idx - write_idx);
        }
    }

    constexpr std::size_t capacity() { return buffer.max_size(); }

    void put(const T& entry) {
        // Wait for space
        while ((num_elements - size()) < 2) {
            ;
        }

        size_t new_write_idx = (write_idx + 1) % num_elements;
        buffer[new_write_idx] = entry;

        // Update the write index AFTER writing the data
        write_idx = new_write_idx;
    }

    T get(const bool block = true) {

        if (!block && size() == 0)
            return -1;
        
        while (size() == 0) {
            ;
        }

        // Consume
        size_t new_read_idx = (read_idx + 1) % num_elements;
        T retrieved_value = buffer[new_read_idx];

        // Update the read index AFTER reading (to signal availability to the producer)
        read_idx = new_read_idx;
        return retrieved_value;
    }

   private:
    std::atomic<std::size_t> read_idx;
    std::atomic<std::size_t> write_idx;
    std::array<std::atomic<T>, num_elements> buffer;
};

shared_fifo.h

#include "fifo.h"

using shared_element_t = uint32_t;
constexpr std::size_t SHARED_MEMORY_SIZE = 0x1700;  // Manually calculated free space using map file
constexpr std::size_t SHARED_MEMORY_ELEMENTS = SHARED_MEMORY_SIZE / sizeof(shared_element_t);
LockLessFifo<shared_element_t, SHARED_MEMORY_ELEMENTS> shared_fifo __attribute__((section(".noinit.$rpmsg_sh_mem")));

我并不期望 FIFO 代码是完美的,但它是实用的。我只是分享了它的完整内容并提供了所有背景信息。

从m7读取FIFO如下: main_m7.cpp

#include <cstdio>

#include "libs/base/ipc_m7.h"

#include "shared_fifo.h"

extern "C" [[noreturn]] void app_main(void* param) {
    (void)param;

    coralmicro::IpcM7::GetSingleton()->StartM4();
    uint32_t counter1, counter2;
    while (true) {
        counter1 = shared_fifo.get();  // Works
        counter2 = shared_fifo.get();  // Always 0
        
        const std::size_t fifo_size = shared_fifo.size();
        constexpr std::size_t fifo_capacity = shared_fifo.capacity();
        
        printf("[M7] counter: %lu/%lu size: %u/%u\r\n", counter1, counter2, fifo_size, fifo_capacity);
    }
}

m4 上的 FIFO 填充如下:

main_m4.cpp

#include <atomic>
#include <cmath>
#include <cstdio>

#include "fsl_pit.h"

#include "shared_fifo.h"


static std::atomic<std::size_t> counter1 = 0;
static std::atomic<std::size_t> counter2 = 0;

void my_pit_irq() {
    // Clear IRQ flag
    PIT_ClearStatusFlags(PIT1, kPIT_Chnl_0, kPIT_TimerFlag);
    
    counter1.store(counter1.load() + 1);  // Works
    counter2.fetch_add(1);                // Always 0

    SDK_ISR_EXIT_BARRIER;
}


extern "C" [[noreturn]] void app_main(void* param) {
    (void)param;

    configure_and_start_timer(); // removed for clarity

    while (true) {
        shared_fifo.put(counter1.load());   // Works
        shared_fifo.put(counter2.load());   // Always 0
    }
}

这段代码的输出是:

(...)    
[M7] counter: 71666076/0 size: 1470/1472
[M7] counter: 71666076/0 size: 1470/1472
[M7] counter: 71666077/0 size: 1470/1472
[M7] counter: 71666077/0 size: 1470/1472
[M7] counter: 71666077/0 size: 1470/1472
(...)

为什么不正确的增量(计数器 1)起作用,而正确的增量(计数器 2)不起作用?我看了一下拆解:

PIT 中断:

_Z10my_pit_irqv:
    
    // Clear IRQ flag
    ldr r3, .L3
    movs r2, #1
    str r2, [r3, #268]

    ldr r2, .L3+4      // Load .LANCHOR0
    dmb ish
    ldr r3, [r2]       // Load value stored at address of .LANCHOR0 (.load())
    dmb ish
    adds r3, r3, #1    //  Increment by 1
    dmb ish
    str r3, [r2]       // Store incremented value (.store())
    

    ldr r3, .L3+8      // Load .LANCHOR1
    dmb ish            
    dmb ish            // Dubble barrier?
.L2:
    ldrex r2, [r3]      // load 
    adds r2, r2, #1     // increment
    strex r1, r2, [r3]  // store
    cmp r1, #0          // check
    bne .L2             // retry

    dmb ish
    dsb 0xF

    bx lr             
.L4:
    .align  2
.L3:
    .word   1074626560
    .word   .LANCHOR0  // counter1
    .word   .LANCHOR1  // counter2

主要:

app_main:
    push {r0, r1, r2, lr}
    ldr r6, .L14            // Counter1
    ldr r4, .L14+4
    ldr r5, .L14+8          // Counter2
.L13:

    // This is the same as for counter2
    add r1, sp, #4
    ldr r3, [r6]            // Load the value of counter1
    dmb ish
    mov r0, r4
    str r3, [sp, #4]
    bl  _ZN5LockLessFifoImLj1472EE3putERKm  // Put it in the FIFO
    dmb ish

    // This is the same as for counter1
    add r1, sp, #4
    ldr r3, [r5]            // Load the value of counter2
    dmb ish
    mov r0, r4
    str r3, [sp, #4]
    bl _ZN5LockLessFifoImLj1472EE3putERKm  // Put it in the FIFO
    b .L13
.L15:
    .align  2
.L14:
    .word   .LANCHOR0
    .word   _ZN511shared_fifoE
    .word   .LANCHOR1

我不明白哪里出了问题。我知道 FIFO 可以正常工作,因为增量正在工作,并且我们看到终端中打印的数字正在上升。生成的程序集非常相似,对我来说看起来是正确的。不幸的是,我(还)无法连接调试器,因为我需要将 JTAG 接头焊接到板上,并且我们需要等待才能进行任何修改。

感谢您抽出时间来查看。

c++ multithreading embedded atomic
1个回答
0
投票

在评论中 @NateEldredge 的帮助下,我找到了答案:C++ 原子在这个硬件平台上无法正确编译。编译器生成

dbm ish
指令,仅同步内部可共享域。

我通过在每个核心上增加共享计数器 1000000 次来验证这一点,结果不等于 2000000。

这可能可以通过手动将

dbm ish
替换为
dbm osh
指令来修复,但快速而肮脏的修复是不够的:

// the attempt that didn't work
for (size_t i = 0; i < 1000000; i++) {
    asm("dmb osh" : : : "memory");
    unica::shared_fifo.shared_counter++;  // not actually atomic wrt. other core
    asm("dmb osh" : : : "memory");
}

不幸的是,我将放弃使用

std::atomic
并重新使用
volatile
并在必要时屏蔽 IRQ

© www.soinside.com 2019 - 2024. All rights reserved.