在我的 C 程序中,我试图将 .asm 代码转换为伪机器代码。
使用的汇编文件:
ORG 0x10
start WB 0x05 ; write 0x05 to MBR
WM 0x400 ; write data on MBR to memory at address 0x400
WB 0x03 ; write 0x03 to MBR
WM 0x401 ; write data on MBR to memory at address 0x401
RM 0x400 ; read address 0x400, store data to MBR
WACC ; write MBR to ACC (via BUS)
RM 0x401 ; read data at memory address 0x401, store to MBR
ADD ; add ACC to MBR (through BUS)
RACC ; read ACC, store to MBR
WM 0x402 ; write data on MBR to memory address 0x402
WB 0x00 ; write 0x00 to MBR
RM 0x402 ; read address 0x402, store data to MBR
WACC ; write MBR to ACC (via BUS)
WB 0x05 ; write 0x05 to MBR
BRGT here ; compare ACC with MBR, jump to here if greater than
RACC ; read ACC, store to MBR
WM 0x403 ; write data on MBR to memory address 0x403
here SHL ; shift left ACC
RACC ; read ACC, store to MBR
WM 0x405 ; write data on MBR to memory at address 0x405
EOP
所需输出:
ADDR=0x10;BUS=0x30;MainMemory();
ADDR=0x11;BUS=0x05;MainMemory();
ADDR=0x12;BUS=0x0C;MainMemory();
ADDR=0x13;BUS=0x00;MainMemory();
ADDR=0x14;BUS=0x30;MainMemory();
ADDR=0x15;BUS=0x03;MainMemory();
ADDR=0x16;BUS=0x0C;MainMemory();
ADDR=0x17;BUS=0x01;MainMemory();
ADDR=0x18;BUS=0x14;MainMemory();
ADDR=0x19;BUS=0x00;MainMemory();
ADDR=0x1A;BUS=0x48;MainMemory();
ADDR=0x1B;BUS=0x00;MainMemory();
ADDR=0x1C;BUS=0x14;MainMemory();
ADDR=0x1D;BUS=0x01;MainMemory();
ADDR=0x1E;BUS=0xF0;MainMemory();
ADDR=0x1F;BUS=0x00;MainMemory();
ADDR=0x20;BUS=0x58;MainMemory();
ADDR=0x21;BUS=0x00;MainMemory();
ADDR=0x22;BUS=0x0C;MainMemory();
ADDR=0x23;BUS=0x02;MainMemory();
ADDR=0x24;BUS=0x30;MainMemory();
ADDR=0x25;BUS=0x00;MainMemory();
ADDR=0x26;BUS=0x14;MainMemory();
ADDR=0x27;BUS=0x02;MainMemory();
ADDR=0x28;BUS=0x48;MainMemory();
ADDR=0x29;BUS=0x00;MainMemory();
ADDR=0x2A;BUS=0x30;MainMemory();
ADDR=0x2B;BUS=0x05;MainMemory();
ADDR=0x2C;BUS=0x90;MainMemory();
ADDR=0x2D;BUS=0x32;MainMemory();
ADDR=0x2E;BUS=0x58;MainMemory();
ADDR=0x2F;BUS=0x00;MainMemory();
ADDR=0x30;BUS=0x0C;MainMemory();
ADDR=0x31;BUS=0x03;MainMemory();
ADDR=0x32;BUS=0xB0;MainMemory();
ADDR=0x33;BUS=0x00;MainMemory();
ADDR=0x34;BUS=0x58;MainMemory();
ADDR=0x35;BUS=0x00;MainMemory();
ADDR=0x36;BUS=0x0C;MainMemory();
ADDR=0x37;BUS=0x05;MainMemory();
ADDR=0x38;BUS=0xF8;MainMemory();
ADDR=0x39;BUS=0x00;MainMemory();
我遇到了一个奇怪的问题,如果我删除位于
convertAsmToC()
中的 char** 数组的 malloc 语句,程序就会崩溃,这很奇怪,因为代码中的任何地方都没有使用数组branchKeys。除此之外,它可以在 Mac 上成功编译并运行,但不能在我的 Windows 机器上成功编译和运行。有问题的 malloc 语句是:
char **branchKeys = malloc(numLines * sizeof(char *));
已经尝试使用我的 Mac 调试 lldb 上的代码,并且没有显示任何分段错误。即使删除了 malloc 语句,它仍然可以在我的 Mac 上运行。该程序针对两种操作系统进行编译。唯一的区别是,Mac 实际上设法完成执行
convertAsmToC()
并输出文本文件,而在 Windows 上它只是结束,没有任何输出,也没有生成文本文件。
有关更多上下文,这些是我的 C:
版本以下是我的代码文件:
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "assembler.h"
#define FILE_PATH "./data/assembly.asm"
#define OUTPUT_PATH "./output/assembled.txt"
int getLinesFromFile(FILE *currentFile, char **lines);
int main()
{
int numLines = 0;
int conversionStatus;
char *lines[1024];
FILE *currentFile = fopen(FILE_PATH, "r");
numLines = getLinesFromFile(currentFile, lines);
if (numLines > 0)
{
char **assembledLines = malloc(numLines * sizeof(char *));
if (assembledLines == NULL)
{
// If malloc fails, handle the memory allocation failure
fprintf(stderr, "Error: Memory allocation for assembledLines failed.\n");
return 1; // Return an error code or handle it as needed
}
conversionStatus = assembleToC(assembledLines, lines, numLines);
if (conversionStatus)
{
FILE *file = fopen(OUTPUT_PATH, "w");
if (file == NULL)
{
perror("Failed to open file");
return 1;
}
// Write each string to the file
for (int i = 0; i < conversionStatus; i++)
{
fprintf(file, "%s\n", assembledLines[i]); // Adds a newline after each string
}
// Close the file
fclose(file);
printf("[SUCCESS] Done assembling %s!\n", FILE_PATH);
}
else
{
printf("[ERROR] Error assembling %s.\n", FILE_PATH);
}
}
else
{
printf("[ERROR] Unable to open file %s or file is empty.\n", FILE_PATH);
}
}
int getLinesFromFile(FILE *currentFile, char **lines)
{
if (currentFile != NULL)
{
int numLines = 0;
char line[1024];
while (fgets(line, sizeof(line), currentFile) != NULL)
{
line[strcspn(line, "\n")] = '\0'; // Replace \n in files with \0 null terminator.
lines[numLines++] = strdup(line); // Copy string to lines array
}
fclose(currentFile);
return numLines;
}
else
{
return 0;
}
}
assembler.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "assembler.h"
typedef struct
{
unsigned char opcode;
int needs_adding;
} INSTRUCTION;
const char *INSTRUCTION_TO_HEX_KEYS[] = {
"ADD", "SUB", "MUL", "AND", "OR", "NOT", "XOR", "SHL", "SHR",
"WM", "RM", "RIO", "WIO", "WB", "WIB", "WACC", "RACC",
"SWAP", "BR", "BRE", "BRNE", "BRGT", "BRLT", "EOP"};
const int NUM_INSTRUCTIONS = sizeof(INSTRUCTION_TO_HEX_KEYS) / sizeof(INSTRUCTION_TO_HEX_KEYS[0]);
const INSTRUCTION INSTRUCTION_TO_HEX_VALUES[] = {
{0xF0, 0}, {0xE8, 0}, {0xD8, 0}, {0xD0, 0}, {0xC8, 0}, {0xC0, 0}, {0xB8, 0}, {0xB0, 0}, {0xA8, 0}, {0x08, 1}, {0x10, 1}, {0x20, 1}, {0x28, 1}, {0x30, 0}, {0x38, 0}, {0x48, 0}, {0x58, 0}, {0x70, 0}, {0x18, 1}, {0xA0, 1}, {0x98, 1}, {0x90, 1}, {0x88, 1}, {0xF8, 0}};
int assembleToC(char **assembledLines, char **extractedLines, int numLines)
{
int eopExists = checkIfEOPExists(extractedLines, numLines);
unsigned int startingAddress = getStartingAddress(extractedLines, numLines);
if (eopExists && startingAddress < 0xFFFF)
{
char **branchKeys = malloc(numLines * sizeof(char *)); // Unused but breaks program on Windows if removed.
int numAssembledLines = convertAsmToC(assembledLines, extractedLines, numLines, startingAddress);
return numAssembledLines;
}
else
{
printf("ERROR!");
return 0;
}
}
int checkIfEOPExists(char **extractedLines, int numLines)
{
int i;
for (i = 0; i < numLines; i++)
{
char *eopExists = strstr(extractedLines[i], "EOP");
if (eopExists)
{
return 1;
}
}
return 0;
}
unsigned int getStartingAddress(char **extractedLines, int numLines)
{
int i;
unsigned int startingAddress = 0x00;
char extraChar; // Variable to check for extra characters after the hex number
for (i = 0; i < numLines; i++)
{
char *orgExists = strstr(extractedLines[i], "ORG");
if (orgExists)
{
int parsed = sscanf(extractedLines[i], "ORG %x%c", &startingAddress, &extraChar);
if (parsed == 1)
{
return startingAddress;
}
else if (parsed == 2)
{
printf("[ERROR] Invalid memory address due to unexpected characters.\n");
return 0xFFFF;
}
else
{
printf("[ERROR] Invalid memory address format.\n");
return 0xFFFF;
}
}
}
return startingAddress;
}
int convertAsmToC(char **assembledLines, char **extractedLines, int numLines, unsigned int startingAddress)
{
int i, j, numAssembledLines = 0;
unsigned int *currentAddress = &startingAddress;
int validAddress = 1;
char *token;
char tempLine[256];
for (i = 1; i < numLines; i++)
{
unsigned int instruction, operand;
strcpy(tempLine, extractedLines[i]);
token = strtok(tempLine, " \t");
if (!validAddress)
{
return 0;
}
while (token != NULL)
{
for (j = 0; j < NUM_INSTRUCTIONS; j++)
{
if (strcmp(token, INSTRUCTION_TO_HEX_KEYS[j]) == 0)
{
instruction = INSTRUCTION_TO_HEX_VALUES[j].opcode;
char *stringOperand = strtok(NULL, " \t");
if (stringOperand != NULL)
{
int parsed = sscanf(stringOperand, "%x", &operand);
validAddress = checkValidAddress(token, operand);
if (validAddress)
{
if (INSTRUCTION_TO_HEX_VALUES[j].needs_adding && parsed == 1)
{
unsigned int current_instruction = (instruction << 8) + operand;
instruction = (current_instruction >> 8) & 0xFF;
operand = current_instruction & 0xFF;
}
else if (parsed != 1)
{
operand = findBranch(stringOperand, extractedLines, currentAddress, numLines, i);
}
token = NULL;
assembledLines[numAssembledLines++] = getInstructionString(instruction, currentAddress);
assembledLines[numAssembledLines++] = getInstructionString(operand, currentAddress);
break;
}
else
{
free(assembledLines);
assembledLines = NULL;
return 0;
}
}
else
{
operand = 0x00;
token = NULL;
assembledLines[numAssembledLines++] = getInstructionString(instruction, currentAddress);
assembledLines[numAssembledLines++] = getInstructionString(operand, currentAddress);
break;
}
}
}
if (token != NULL)
{
token = strtok(NULL, " \t");
}
}
}
return numAssembledLines;
}
int checkValidAddress(char *instruction, unsigned int operand)
{
if (strcmp(instruction, "WIO") == 0 || strcmp(instruction, "RIO") == 0)
{
return operand <= 0x01F;
}
else if (strcmp(instruction, "WM") == 0 || strcmp(instruction, "RM") == 0)
{
return operand >= 0x400 && operand <= 0x7FF;
}
else
{
return 1;
}
}
unsigned int findBranch(char *stringOperand, char **extractedLines, unsigned int *currentAddress, int numLines, int currentLine)
{
unsigned int operand = 0x00;
if (stringOperand)
{
if (strcmp(stringOperand, ";") != 0)
{
for (int k = currentLine + 1; k < numLines; k++)
{
char *branchFound = strstr(extractedLines[k], stringOperand);
if (branchFound)
{
operand = (*currentAddress) + (2 * (k - currentLine));
break;
}
}
}
}
return operand;
}
char *getInstructionString(unsigned int instruction, unsigned int *currentAddress)
{
char buffer[50];
int max_len = sizeof(buffer);
int len = snprintf(buffer, max_len, "ADDR=0x%02X;BUS=0x%02X;MainMemory();", *currentAddress, instruction);
(*currentAddress)++;
if (len >= 0 && len < max_len)
{
char *result = malloc(len + 1);
if (result)
{
strcpy(result, buffer);
return result;
}
}
return NULL;
}
char *getOperandString(unsigned int operand, unsigned int *currentAddress)
{
char buffer[50];
int max_len = sizeof(buffer);
int len = snprintf(buffer, max_len, "ADDR=0x%02X;BUS=0x%02X;MainMemory();", *currentAddress, operand);
(*currentAddress)++;
if (len >= 0 && len < max_len)
{
char *result = malloc(len + 1);
if (result)
{
strcpy(result, buffer);
return result;
}
}
return NULL;
}
assembler.h
#ifndef ASSEMBLER_H_
#define ASSEMBLER_H_
int assembleToC(char **assembledLines, char **extractedLines, int numLines);
int convertAsmToC(char **assembledLines, char **extractedLines, int numLines, unsigned int startingAddress);
int checkIfEOPExists(char **extractedLines, int numLines);
unsigned int getStartingAddress(char **extractedLines, int numLines);
int checkValidAddress(char *instruction, unsigned int operand);
unsigned int findBranch(char *stringOperand, char **extractedLines, unsigned int *currentAddress, int numLines, int currentLine);
char *getInstructionString(unsigned int instruction, unsigned int *currentAddress);
char *getOperandString(unsigned int operand, unsigned int *currentAddress);
#endif
您的代码存在许多与内存相关的问题(泄漏和损坏)。我建议你首先解决这些问题。
您错误地写入了堆内存(由于我之前提到的内存问题),并且通过那里的分配,您的代码运行没有问题幸运的是。从代码中删除分配后,现在您将无法将如此大量的数据写入该堆块并覆盖堆元数据。我会将
valgrind
的投诉(无论是否有分配)放入您的代码中,以便您可以修复它们。
与分配(工作版本):
==78194== Invalid write of size 8
==78194== at 0x109B58: convertAsmToC (assembler.c:128)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194== Address 0x4a9ed50 is 0 bytes after a block of size 176 alloc'd
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109425: main (main.c:20)
==78194==
==78194== Invalid write of size 8
==78194== at 0x109B96: convertAsmToC (assembler.c:129)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194== Address 0x4a9ed58 is 8 bytes after a block of size 176 alloc'd
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109425: main (main.c:20)
==78194==
==78194== Invalid read of size 8
==78194== at 0x1094FF: main (main.c:41)
==78194== Address 0x4a9ed50 is 0 bytes after a block of size 176 alloc'd
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109425: main (main.c:20)
==78194==
[SUCCESS] Done assembling test.asm!
==78194==
==78194== HEAP SUMMARY:
==78194== in use at exit: 2,739 bytes in 66 blocks
==78194== total heap usage: 71 allocs, 5 frees, 12,899 bytes allocated
==78194==
==78194== 33 bytes in 1 blocks are indirectly lost in loss record 1 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109EB4: getInstructionString (assembler.c:204)
==78194== by 0x109C11: convertAsmToC (assembler.c:143)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194==
==78194== 33 bytes in 1 blocks are indirectly lost in loss record 2 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109EB4: getInstructionString (assembler.c:204)
==78194== by 0x109C4F: convertAsmToC (assembler.c:144)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194==
==78194== 132 bytes in 4 blocks are definitely lost in loss record 3 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109EB4: getInstructionString (assembler.c:204)
==78194== by 0x109B57: convertAsmToC (assembler.c:128)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194==
==78194== 132 bytes in 4 blocks are definitely lost in loss record 4 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109EB4: getInstructionString (assembler.c:204)
==78194== by 0x109B95: convertAsmToC (assembler.c:129)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194==
==78194== 528 bytes in 16 blocks are indirectly lost in loss record 5 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109EB4: getInstructionString (assembler.c:204)
==78194== by 0x109B57: convertAsmToC (assembler.c:128)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194==
==78194== 528 bytes in 16 blocks are indirectly lost in loss record 6 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109EB4: getInstructionString (assembler.c:204)
==78194== by 0x109B95: convertAsmToC (assembler.c:129)
==78194== by 0x109729: assembleToC (assembler.c:29)
==78194== by 0x109482: main (main.c:27)
==78194==
==78194== 572 (176 direct, 396 indirect) bytes in 1 blocks are definitely lost in loss record 7 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x10970F: assembleToC (assembler.c:28)
==78194== by 0x109482: main (main.c:27)
==78194==
==78194== 902 (176 direct, 726 indirect) bytes in 1 blocks are definitely lost in loss record 8 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x109425: main (main.c:20)
==78194==
==78194== 1,001 bytes in 22 blocks are definitely lost in loss record 9 of 9
==78194== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78194== by 0x491958E: strdup (strdup.c:42)
==78194== by 0x109657: getLinesFromFile (main.c:68)
==78194== by 0x1093FE: main (main.c:17)
==78194==
==78194== LEAK SUMMARY:
==78194== definitely lost: 1,617 bytes in 32 blocks
==78194== indirectly lost: 1,122 bytes in 34 blocks
==78194== possibly lost: 0 bytes in 0 blocks
==78194== still reachable: 0 bytes in 0 blocks
==78194== suppressed: 0 bytes in 0 blocks
==78194==
==78194== For lists of detected and suppressed errors, rerun with: -s
==78194== ERROR SUMMARY: 21 errors from 8 contexts (suppressed: 0 from 0)
没有分配(崩溃版本):
==78991== Memcheck, a memory error detector
==78991== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==78991== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==78991== Command: ./osman
==78991==
==78991== Invalid write of size 8
==78991== at 0x109B43: convertAsmToC (assembler.c:128)
==78991== by 0x109714: assembleToC (assembler.c:29)
==78991== by 0x109482: main (main.c:27)
==78991== Address 0x4a9ed50 is 0 bytes after a block of size 176 alloc'd
==78991== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78991== by 0x109425: main (main.c:20)
==78991==
==78991== Invalid write of size 8
==78991== at 0x109B81: convertAsmToC (assembler.c:129)
==78991== by 0x109714: assembleToC (assembler.c:29)
==78991== by 0x109482: main (main.c:27)
==78991== Address 0x4a9ed58 is 8 bytes after a block of size 176 alloc'd
==78991== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==78991== by 0x109425: main (main.c:20)
==78991==
valgrind: m_mallocfree.c:303 (get_bszB_as_is): Assertion 'bszB_lo == bszB_hi' failed.
valgrind: Heap block lo/hi size mismatch: lo = 240, hi = 78248064.
This is probably caused by your program erroneously writing past the
end of a heap block and corrupting heap metadata. If you fix any
invalid writes reported by Memcheck, this assertion failure will
probably go away. Please try that before reporting this as a bug.
host stacktrace:
==78991== at 0x5804284A: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x58042977: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x58042B1B: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x5804C8CF: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x5803AE9A: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x580395B7: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x5803DF3D: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x58038868: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x1008E2F1FD: ???
==78991== by 0x1008CA9F2F: ???
==78991== by 0x581FCD83: ??? (in /usr/libexec/valgrind/memcheck-amd64-linux)
==78991== by 0x1008CA9F17: ???
==78991== by 0x1008CA9F2F: ???
sched status:
running_tid=1
Thread 1: status = VgTs_Runnable (lwpid 78991)
==78991== at 0x109BFD: convertAsmToC (assembler.c:143)
==78991== by 0x109714: assembleToC (assembler.c:29)
==78991== by 0x109482: main (main.c:27)
client stack range: [0x1FFEFFC000 0x1FFF000FFF] client SP: 0x1FFEFFD7F0
valgrind stack range: [0x1008BAA000 0x1008CA9FFF] top usage: 18744 of 1048576
Note: see also the FAQ in the source distribution.
It contains workarounds to several common problems.
In particular, if Valgrind aborted or crashed after
identifying problems in your program, there's a good chance
that fixing those problems will prevent Valgrind aborting or
crashing, especially if it happened in m_mallocfree.c.
If that doesn't help, please report this bug to: www.valgrind.org
In the bug report, send all the above text, the valgrind
version, and what OS and version you are using. Thanks.