GCC/LD 位置无关代码,具有指令相关数据访问

问题描述 投票:0回答:1

动机

假设我有:

int       some_bss_values[8];
int       some_data_values[] = {1,2,3,4,5,6,7,8};
int const some_rodata_values[] = {9,10,11,12,13,14,15,16};

//Some silly code that shows usage of bss, data, and rodata
int some_function(int x) {
   some_bss_values[x] = some_rodata_values[x];
   return some_data_values[x]++;
}

我的最终目标是将其编译为可以在运行时加载的二进制 blob(这是一个嵌入式系统,因此没有动态链接器甚至 ELF 加载器)。具体来说,我希望能够在任何地址加载此 blob(包括数据)并跳转到

some_function

我尝试了什么

我尝试编写一个简单的链接器脚本:

SECTIONS {
    .text : {
        *(.text);
    }
    .data : { 
        *(.bss); /*Placed here because I explicitly want the BSS to be part of the image*/
        *(.data); 
    } =0
    .rodata : { *(.rodata); }
}

我将示例代码编译为 ELF:

# Using -g and -O0 so we can have a readable disassembly. I'm actually using
# a cross-compiler but we'll use regular gcc with x86 for the sake of the question

gcc -Wl,-esome_function -o t.elf -fPIC -nostdlib -T my_linker_script.ld -g -O0 my_code.cpp

然后我生成了一个图像:

objcopy -Obinary -j.text -j.data -j.rodata t.elf t.bin

问题

上面的命令生成了一个二进制 blob 供我使用,包括 BSS 的显式零,但查看反汇编会凸显出一个问题:

objdump -sSxC t.elf

...
Sections:
Idx Name          Size      VMA               LMA               File off  Algn
  0 .note.gnu.build-id 00000024  0000000000000000  0000000000000000  00200000  2**2
                  CONTENTS, ALLOC, LOAD, READONLY, DATA
  1 .text         00000053  0000000000000024  0000000000000024  00200024  2**0
                  CONTENTS, ALLOC, LOAD, READONLY, CODE
  2 .data         00000040  0000000000000080  0000000000000080  00200080  2**5
                  CONTENTS, ALLOC, LOAD, DATA
...

Disassembly of section .text:

0000000000000024 <some_function(int)>:
int       some_bss_values[8];
int       some_data_values[] = {1,2,3,4,5,6,7,8};
int const some_rodata_values[] = {9,10,11,12,13,14,15,16};

//Some silly code that shows usage of bss, data, and rodata
int some_function(int x) {
  24:   55                      push   %rbp
  25:   48 89 e5                mov    %rsp,%rbp
  28:   89 7d fc                mov    %edi,-0x4(%rbp)
   some_bss_values[x] = some_rodata_values[x];
  2b:   8b 45 fc                mov    -0x4(%rbp),%eax
  2e:   48 98                   cltq   
  30:   48 8d 14 85 00 00 00    lea    0x0(,%rax,4),%rdx
  37:   00 
  38:   48 8d 05 a1 00 00 00    lea    0xa1(%rip),%rax        # e0 <some_rodata_values>
  3f:   8b 0c 02                mov    (%rdx,%rax,1),%ecx
  42:   48 c7 c0 80 00 00 00    mov    $0x80,%rax
  49:   8b 55 fc                mov    -0x4(%rbp),%edx
  4c:   48 63 d2                movslq %edx,%rdx
  4f:   89 0c 90                mov    %ecx,(%rax,%rdx,4)
   return some_data_values[x]++;
  52:   48 c7 c0 a0 00 00 00    mov    $0xa0,%rax
  59:   8b 55 fc                mov    -0x4(%rbp),%edx
  5c:   48 63 d2                movslq %edx,%rdx
  5f:   8b 04 90                mov    (%rax,%rdx,4),%eax
  62:   8d 70 01                lea    0x1(%rax),%esi
  65:   48 c7 c2 a0 00 00 00    mov    $0xa0,%rdx
  6c:   8b 4d fc                mov    -0x4(%rbp),%ecx
  6f:   48 63 c9                movslq %ecx,%rcx
  72:   89 34 8a                mov    %esi,(%rdx,%rcx,4)
}
  75:   5d                      pop    %rbp
  76:   c3                      retq  

在这里我们看到从

rodata
读取正确地使用了指令相对寻址。然而,它似乎对 BSS 使用硬编码地址 0,对数据部分使用硬编码地址
0xA0

如何指示 gcc/ld 对 BSS 和

.data
部分中的数据使用指令相对寻址?

gcc embedded ld
1个回答
0
投票

编译器和链接器如何生成用于访问变量的代码的详细信息在很大程度上取决于您的体系结构,因此我将在本答案中重点关注 RISC-V(64 位)。我基本上只是复制了您正在做的事情,除了我在链接器脚本中指定了入口点,并且一切正常。最终的程序集通过引用全局偏移表来访问变量,全局偏移表是对象中的地址数组。

$ cat test.c
int       some_bss_values[8];
int       some_data_values[] = {1,2,3,4,5,6,7,8};
int const some_rodata_values[] = {9,10,11,12,13,14,15,16};

//Some silly code that shows usage of bss, data, and rodata
int some_function(int x) {
   some_bss_values[x] = some_rodata_values[x];
   return some_data_values[x]++;
}

$ cat test.ld
ENTRY(some_function)

SECTIONS {
    .text : {
        *(.text);
    }
    .data : {
        *(.bss); /*Placed here because I explicitly want the BSS to be part of the image*/
        *(.data);
    } =0
    .rodata : { *(.rodata); }
}

$ riscv64-unknown-elf-gcc -Og -Wall test.c -fPIC -T test.ld -nostdlib && riscv64-unknown-elf-objdump -d a.out
c:/msys64/mingw64/bin/../lib/gcc/riscv64-unknown-elf/12.2.0/../../../../riscv64-unknown-elf/bin/ld.exe: warning: section `.data' type changed to PROGBITS
c:/msys64/mingw64/bin/../lib/gcc/riscv64-unknown-elf/12.2.0/../../../../riscv64-unknown-elf/bin/ld.exe: warning: a.out has a LOAD segment with RWX permissions

a.out:     file format elf64-littleriscv


Disassembly of section .text:

0000000000000000 <some_function>:
   0:   050a                    sll     a0,a0,0x2
   2:   00000797                auipc   a5,0x0
   6:   0867b783                ld      a5,134(a5) # 88 <_GLOBAL_OFFSET_TABLE_+0x18>
   a:   97aa                    add     a5,a5,a0
   c:   4398                    lw      a4,0(a5)
   e:   00000797                auipc   a5,0x0
  12:   06a7b783                ld      a5,106(a5) # 78 <_GLOBAL_OFFSET_TABLE_+0x8>
  16:   97aa                    add     a5,a5,a0
  18:   c398                    sw      a4,0(a5)
  1a:   00000797                auipc   a5,0x0
  1e:   0667b783                ld      a5,102(a5) # 80 <_GLOBAL_OFFSET_TABLE_+0x10>
  22:   97aa                    add     a5,a5,a0
  24:   4388                    lw      a0,0(a5)
  26:   0015071b                addw    a4,a0,1
  2a:   c398                    sw      a4,0(a5)
  2c:   8082                    ret
© www.soinside.com 2019 - 2024. All rights reserved.