在汇编 nasm 中插入来自 5 个字符串的字符

Question

我必须编写一个汇编程序来插入来自用户在键盘上键入的五个不同字符串的字符，例如，如果我有：

S1：“你好”
S2：“再见”
S3：“苹果”
S4：“汽车”
S5：“树”

结果是：“HBACTeyparleprelloe”

这是我到目前为止所做的，它可以从相同大小的弦插入，我不知道该怎么做才能让它适用于不同大小的弦，以及是否有更好的方法来做到这一点。我将不胜感激，因为它是我的第一个汇编程序之一。

segment .data
instruccion db 'Ingrese 5 cadenas de igual longitud no mayores a 20 caracteres:',0x0A
lonI EQU ($-instruccion)

segment .bss
contador resb 1
cad1 resb 20
cad2 resb 20
cad3 resb 20
cad4 resb 20
cad5 resb 20
cad6 resb 101

segment .text
    global _start
_start:
    mov edx,lonI
    mov ecx,instruccion
    call imprimir

    mov edx,20d
    mov ecx,cad1
    call leer

    mov ecx,cad2
    call leer

    mov ecx,cad3
    call leer

    mov ecx,cad4
    call leer

    mov ecx,cad5
    call leer

    mov edi,cad1
    mov ecx,255
    mov eax,0Ah
    repne scasb
    mov eax,255
    inc ecx
    sub eax,ecx

    mov edi,cad6
    mov ecx,eax
    mov ebx,0
    ciclo:
        mov esi,cad1
        cld
        mov edx,ecx
        mov ecx,ebx
        cmp ebx,0
        jne THEN1
        je ELSE1
        THEN1:
            lodsb
            loop THEN1
        ELSE1:
            movsb

        mov esi,cad2
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN2
        je ELSE2
        THEN2:
            lodsb
            loop THEN2
        ELSE2:
            movsb

        mov esi,cad3
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN3
        je ELSE3
        THEN3:
            lodsb
            loop THEN3
        ELSE3:
            movsb

        mov esi,cad4
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN4
        je ELSE4
        THEN4:
            lodsb
            loop THEN4
        ELSE4:
            movsb

        mov esi,cad5
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN5
        je ELSE5
        THEN5:
            lodsb
            loop THEN5
        ELSE5:
            movsb

        mov ecx,edx
        inc ebx
    loop ciclo

    mov eax,0Ah
    stosb
    mov edx,101d
    mov ecx,cad6
    call imprimir

    mov eax,1           ;system call number (sys_exit)
    int 0x80            ;call kernel

    leer:
        mov ebx,0
        mov eax,3
        int 0x80
        ret

    imprimir:
        mov ebx,1
        mov eax,4
        int 0x80
        ret

Answer 1

结果是：“HBACTeyparleprelloe”

我当然希望这是一个打字错误，否则这确实会成为一个非常讨厌的练习！我将假设为“HBACTeyparleprelleoe”。

它可以从刺中嵌入相同的大小

您现在的代码似乎 that 正确，但为什么它如此复杂？
如果当前索引（字符串中的偏移量）为 0，则只需执行

movsb

。如果当前索引不为 0，那么您需要向前跳过，您可以使用（浪费的）

loop

of

lodsb

指令。有时人们想知道为什么允许

rep lodsb

，这里他们有一些用例。虽然不是真的，因为实际的解决方案是更换：

mov esi,cad1
cld
mov ecx,ebx
cmp ebx,0
jne THEN1
je ELSE1
THEN1:
    lodsb
    loop THEN1
ELSE1:
    movsb

完全由：

lea     esi, [cad1 + ebx]
movsb

或者通过：

movzx   eax, byte [cad1 + ebx]
stosb

我不知道该怎么做才能让它适用于不同大小的字符串

下面我将介绍3个解决方案，都经过测试。

解决方案 1

因为恰好有 5 个输入字符串，所以 32 位 x86 架构的寄存器数量恰到好处，可以将各个指针保存在自己的寄存器中。这种方法提供了最快的代码，但前提是各个字符串的长度相差不大。

S:      db      43 dup 0
S1:     db      "Hello", 10
S2:     db      "Bye", 10
S3:     db      "AppleADayKeepsTheDoctorAway", 10
S4:     db      "Car", 10
S5:     db      "Tree", 10

        ...

Begin:  mov     ebx, S1                 ; Addresses of the input strings
        mov     ecx, S2
        mov     edx, S3
        mov     esi, S4
        mov     edi, S5
        mov     ebp, S                  ; Address of the output string
.a:     push    ebp                     ; (1)

        movzx   eax, byte [ebx]         ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .b                      ;  no longer add to the output string
        inc     ebx                     ; Go to the next character in this string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.b:     movzx   eax, byte [ecx]         ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .c                      ;  no longer add to the output string
        inc     ecx                     ; Go to the next character in this string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.c:     movzx   eax, byte [edx]         ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .d                      ;  no longer add to the output string
        inc     edx                     ; Go to the next character in this string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.d:     movzx   eax, byte [esi]         ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .e                      ;  no longer add to the output string
        inc     esi                     ; Go to the next character in this string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.e:     movzx   eax, byte [edi]         ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .f                      ;  no longer add to the output string
        inc     edi                     ; Go to the next character in this string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.f:     pop     eax                     ; (1)
        cmp     eax, ebp                ; Was anything added to the output string ?
        jne     .a                      ; Yes, then repeat

解决方案 2

一个小的编辑允许我们处理any数量的输入字符串。这种方法比以前慢，但它需要填充字符串以使它们具有相同的长度（就像你在你的问题中那样）。

S:      db      43 dup 0
S1:     db      "Hello", 22 dup 10, 10
S2:     db      "Bye", 24 dup 10, 10
S3:     db      "AppleADayKeepsTheDoctorAway", 10
S4:     db      "Car", 24 dup 10, 10
S5:     db      "Tree", 23 dup 10, 10

        ...

Begin:  xor     ebx, ebx                ; Current offset in every string
        mov     ebp, S                  ; Address of the output string
.a:     push    ebp                     ; (1)

        movzx   eax, byte [S1 + ebx]    ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .b                      ;  no longer add to the output string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.b:     movzx   eax, byte [S2 + ebx]    ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .c                      ;  no longer add to the output string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.c:     movzx   eax, byte [S3 + ebx]    ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .d                      ;  no longer add to the output string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.d:     movzx   eax, byte [S4 + ebx]    ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .e                      ;  no longer add to the output string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.e:     movzx   eax, byte [S5 + ebx]    ; Read a character from this string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .f                      ;  no longer add to the output string
        mov     [ebp], al               ; Add character to the output string
        inc     ebp

.f:     inc     ebx                     ; Go to next character in every string
        pop     eax                     ; (1)
        cmp     eax, ebp                ; Was anything added to the output string ?
        jne     .a                      ; Yes, then repeat

解决方案 3

这次我们创建一个数组，其中包含指向各个字符串的指针。这些指针连续用于从关联的字符串中检索字符，当我们遇到字符串结束标记 (10) 时，我们只需从数组中删除相关指针。其他解决方案一直在处理一个耗尽的字符串，但这里一个耗尽的字符串从循环中消失了。
因为这种方法需要做更多的内务处理，所以它在非常常规的测试数据上运行速度会变慢。然而，一旦你给它提供一个更真实的数据集，一个有短字符串和长字符串，它就会发光......输入字符串的数量也没有限制，不需要填充，也不需要使用相同大小的字符串缓冲区（比如你的程序）。

P:      dd      S1, S2, S3, S4, S5, 0
S:      db      43 dup 0
S1:     db      "Hello", 10
S2:     db      "Bye", 10
S3:     db      "AppleADayKeepsTheDoctorAway", 10
S4:     db      "Car", 10
S5:     db      "Tree", 10

        ...

Begin:  mov     ebp, S                  ; Address of the output string
        jmp     .e

.a:     mov     edi, ebx
.b:     mov     eax, [edi+4]            ; Move all the stringpointers that follow
        mov     [edi], eax              ;  one position down in the array
        add     edi, 4
        test    eax, eax                ; Until the zero-terminator got moved down
        jnz     .b
        jmp     .d                      ; Continue with the next stringpointer

.c:     movzx   eax, byte [esi]         ; Read a character from the current string
        cmp     al, 10                  ; If this string is exhausted, then
        je      .a                      ;  go remove its pointer from the array
        inc     esi                     ; Go to the next character in the current string
        mov     [ebx], esi              ; Update the current stringpointer
        add     ebx, 4                  ; Go to the next stringpointer
        mov     [ebp], al               ; Add character to the output string
        inc     ebp
.d:     mov     esi, [ebx]              ; Get current stringpointer
        test    esi, esi                ; Arrived at the end of the array if ESI is zero
        jnz     .c
.e:     mov     ebx, P                  ; Address of the array with stringpointers
        mov     esi, [ebx]              ; Get current stringpointer
        test    esi, esi                ; The array is empty if the 1st dword is zero
        jnz     .c

方法一	方法二	方法三	评论
0.4 微秒	0.5 微秒	1.1 微秒	5 根短弦
2.0 微秒	2.1 微秒	1.5 微秒	带 1 根长绳子

预期产出：

HBACTeyparleprelleoeADayKeepsTheDoctorAway

在汇编 nasm 中插入来自 5 个字符串的字符

问题描述投票：0回答：1

1个回答

解决方案 1

解决方案 2

解决方案 3

最新问题

在汇编 nasm 中插入来自 5 个字符串的字符

问题描述 投票：0回答：1

1个回答

解决方案 1

解决方案 2

解决方案 3

最新问题

问题描述投票：0回答：1