malloc 相关问题

我正在尝试使用用户的输入作为大小动态地将内存分配给结构，但每次我这样做时都会出现错误。我的结构如下： #包括 #包括我正在尝试使用用户的输入作为大小将内存动态分配给结构，但每次我这样做时都会出现错误。我的结构如下： #include <stdio.h> #include <stdlib.h> #include <string.h> // structure that holds the info for a phone record struct PHONE_RECORD { char name[50]; char birthday[12]; char phone[15]; } *phonebook; 动态分配的代码在这里： int num_space(){ int num_records; struct PHONE_RECORD *phonebook; printf("Enter num of records: "); scanf("%d", &num_records); phonebook = (struct PHONE_RECORD*) malloc(sizeof(struct PHONE_RECORD)*num_records); if (phonebook == NULL){ printf("Not enough memory.\n"); return 1; } free(phonebook); return num_records; } 代码允许用户输入一个数字，但随后给我一个错误并退出程序。该项目中还有其他部分，但我已经对它们进行了所有测试，它们可以毫无问题地工作，只有 malloc 部分不起作用。供参考，这是我的主要内容： #include <stdio.h> #include <string.h> #include "mini4Bphone.c" extern void addRecord(); extern void findRecords(); extern void listRecords(); extern void loadCSV(); extern void saveCSV(); extern int num_space(); // dispaly the menu void menu() { int choice; num_space(); //display unitl user quits using while loop and execute whatever command user inputs while (1) { printf("Phonebook Menu: "); printf("(1) Add "); printf("(2) Find "); printf("(3) List "); printf("(4) Quit "); printf("> "); scanf("%d", &choice); switch (choice) { case 1: addRecord(); break; case 2: findRecord(); break; case 3: listRecords(); break; case 4: return; default: printf("Invalid choice.\n"); break; } } } // load tne csv,menu and save the csv after all wanted functions are complete, return 0 int main() { loadCSV(); menu(); saveCSV(); return 0; } 感谢您的宝贵意见！我尝试在函数内部和外部使用 malloc 无济于事。它应该让用户输入一个数字，然后将空间分配给结构。但是，每次我尝试运行该程序时，都会出现一个错误。局部变量struct PHONE_RECORD *phonebook;隐藏同名全局变量 num_space() 分配空间然后释放它。这是没有意义的。想必你想为全局变量分配空间： int num_space() { int num_records; printf("Enter num of records: "); scanf("%d", &num_records); phonebook = (struct PHONE_RECORD*) malloc(sizeof(struct PHONE_RECORD)*num_records); if (phonebook == NULL){ printf("Not enough memory.\n"); return 1; } return num_records; } 就您提供的信息而言，这可以解决您的段错误。使用符号常量（NAME_LEN、BIRTHDAY_LEN、PHONE_LEN）代替魔法值（50、12、15）。使用局部变量并传递他们操作所需的任何数据。这使您的代码更容易推理。检查scanf()的返回值，否则你可能操作的是未初始化的数据。喜欢使用变量而不是sizeof()的类型。它使类型更改更容易，重复代码更少。在适当的时候优先使用无符号类型。 num_records < 0是什么意思？ 0 应该是一个有效的选择吗？ malloc(0) 是实现定义的，所以我在下面不允许它。在menu()函数中为您的电话簿分配空间是没有意义的。将其移至main()。不要从 malloc 投射void *。（不固定）如果您不需要从 num_records 返回的 num_space() 值，则将返回类型更改为 void。如果这样做，请将返回值分配给变量。（不固定）考虑在 char * 中使用 struct phonebook 而不是浪费的固定大小的字符串。它通常意味着每个成员的分配，但使用strdup().相当容易最小化您的代码，以便您了解我们对您的期望： #include <stdio.h> #include <stdlib.h> #define NAME_LEN 50 #define BIRTHDAY_LEN 12 #define PHONE_LEN 15 struct phonebook { char name[NAME_LEN]; char birthday[BIRTHDAY_LEN]; char phone[PHONE_LEN]; }; size_t num_space(struct phonebook **phonebook) { size_t num_records; printf("Enter num of records: "); if(scanf("%zu", &num_records) != 1 || !num_records) { printf("scanf failed\n"); return 0; } *phonebook = malloc(sizeof **phonebook * num_records); if (!*phonebook) { printf("malloc failed\n"); return 0; } return num_records; } int main() { struct phonebook *phonebook = NULL; num_space(&phonebook); free(phonebook); }

c struct malloc variable-assignment

回答 1 投票 0

Cython 导致 Python 解释器在创建 Python 对象时崩溃

我们有为 C++ 绑定开发的 Cython 包装器，Python 客户端正在使用 cython 包装器。 Python 解释器有时会崩溃（分段）或者当我们 cr 时无法正常工作...

python-3.x memory-management malloc cython cythonize

回答 0 投票 0

如何释放堆中的内存而不会在链表中发生内存泄漏

我写了一个函数来处理链表中的内存释放，但该函数能够释放它们，但我遇到了内存泄漏。请有人告诉我为什么这个功能能够...

c linked-list malloc singly-linked-list

回答 2 投票 0

如何从结构中获得正确的输出？从二进制文件加载信息后

我正在尝试将数据从二进制文件读取到 inode 结构中。代表一个文件系统。但是我遇到了分段错误，并且不知道如何修复它。我已经将地址都打印在...

c pointers memory struct malloc

回答 1 投票 0

当 NUM_INODES 不够大时为什么会出现分段错误？

我有一个像这样的 master_file_table： xxd master_file_table 00000000：0000 0000 0200 0000 2f00 0100 0000 0000 ....../...... 00000010: 0200 0000 0100 0000 0000 0000 0200 0000 ..................

c memory binary malloc

回答 0 投票 0

为什么在使用 `munmap()` 后页面回收仍然存在？

对于一个研究项目，我必须使用 mmap() 和 munmap() 编写 malloc() 和 free() 的重新实现。我在最后一个 Ubuntu 上运行。对于我的测试，我使用命令 time -v (from /usr/bin/time) w...

c malloc dynamic-memory-allocation free mmap

回答 1 投票 0

CUDA 内核在 cudaMallocManaged 内存上运行时速度慢 10 倍，即使预取也是如此

#include #包括 #包括 #包括使用命名空间标准；类 MyTimer { std::chrono::time_point #include <cuda_runtime.h> #include <string> #include <chrono> #include <random> using namespace std; class MyTimer { std::chrono::time_point<std::chrono::system_clock> start; public: void startCounter() { start = std::chrono::system_clock::now(); } int64_t getCounterNs() { return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now() - start).count(); } int64_t getCounterMs() { return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count(); } double getCounterMsPrecise() { return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now() - start).count() / 1000000.0; } }; __global__ void HelloWorld() { printf("Hello world\n"); } volatile double dummy = 0; __global__ void multiply(int N, float* __restrict__ output, const float* __restrict__ x, const float* __restrict__ y) { int start = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { output[i] = x[i] * y[i]; } } int main() { MyTimer timer; srand(time(NULL)); HelloWorld<<<1,1>>>(); timer.startCounter(); int N = 2000 * 2000; float* h_a = new float[N]; float* h_b = new float[N]; float* h_c = new float[N]; float* h_res = new float[N]; for (int i = 0; i < N; i++) { h_a[i] = float(rand() % 1000000) / (rand() % 1000 + 1); h_b[i] = float(rand() % 1000000) / (rand() % 1000 + 1); h_c[i] = h_a[i] * h_b[i]; } dummy = timer.getCounterMsPrecise(); timer.startCounter(); float *d_a, *d_b, *d_c; cudaMalloc(&d_a, N * sizeof(float)); cudaMalloc(&d_b, N * sizeof(float)); cudaMalloc(&d_c, N * sizeof(float)); dummy = timer.getCounterMsPrecise(); cout << "cudaMalloc cost = " << dummy << "\n"; timer.startCounter(); cudaMemcpy(d_a, h_a, N * sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(d_b, h_b, N * sizeof(float), cudaMemcpyHostToDevice); cudaDeviceSynchronize(); dummy = timer.getCounterMsPrecise(); cout << "H2D copy cost = " << dummy << "\n"; timer.startCounter(); constexpr int GRID_DIM = 256; constexpr int BLOCK_DIM = 256; multiply<<<GRID_DIM, BLOCK_DIM>>>(N, d_c, d_a, d_b); cudaDeviceSynchronize(); dummy = timer.getCounterMsPrecise(); cout << "kernel cost = " << dummy << "\n"; timer.startCounter(); cudaMemcpy(h_res, d_c, N * sizeof(float), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); dummy = timer.getCounterMsPrecise(); cout << "D2H copy cost = " << timer.getCounterMsPrecise() << "\n"; for (int i = 0; i < N; i++) if (h_res[i] != h_c[i]) { cout << "error\n"; exit(1); } return 0; } 如果我用普通的cudaMalloc，结果是 Hello world cudaMalloc cost = 0.599463 H2D copy cost = 5.16785 kernel cost = 0.109068 D2H copy cost = 7.18768 但是如果我使用cudaMallocManaged，它就变成了 Hello world cudaMalloc cost = 0.116722 H2D copy cost = 8.26673 kernel cost = 1.70356 D2H copy cost = 6.8841 为什么会有这么大的性能下降？代码已经手动将内存复制到设备端，所以它不应该与常规cudaMalloc-ed设备内存完全相同吗？使用托管内存时，“预取”并不意味着使用cudaMemcpy。我不建议将 cudaMemcpy 与托管内存一起使用。你不会找到任何建议的培训材料，而且它不一定会按照你的想法去做。要在按需分页托管内存（也称为统一内存，或 UM）机制中预取数据，您实际上应该使用 cudaMemPrefetchAsync。当我这样做时，我观察到这两种情况在性能上没有显着差异。为了进行明智的比较，我不得不对您的代码进行一些重构： $ cat t2230.cu #include <cuda_runtime.h> #include <string> #include <chrono> #include <random> #include <iostream> using namespace std; class MyTimer { std::chrono::time_point<std::chrono::system_clock> start; public: void startCounter() { start = std::chrono::system_clock::now(); } int64_t getCounterNs() { return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now() - start).count(); } int64_t getCounterMs() { return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count(); } double getCounterMsPrecise() { return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now() - start).count() / 1000000.0; } }; __global__ void HelloWorld() { printf("Hello world\n"); } volatile double dummy = 0; __global__ void multiply(int N, float* __restrict__ output, const float* __restrict__ x, const float* __restrict__ y) { int start = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { output[i] = x[i] * y[i]; } } int main() { MyTimer timer; srand(time(NULL)); HelloWorld<<<1,1>>>(); int N = 2000 * 2000; timer.startCounter(); float *d_a, *d_b, *d_c; #ifdef USE_MANAGED cudaMallocManaged(&d_a, N * sizeof(float)); cudaMallocManaged(&d_b, N * sizeof(float)); cudaMallocManaged(&d_c, N * sizeof(float)); for (int i = 0; i < N; i++) { d_a[i] = float(rand() % 1000000) / (rand() % 1000 + 1); d_b[i] = float(rand() % 1000000) / (rand() % 1000 + 1); d_c[i] = 0.f; } cudaMemPrefetchAsync(d_a, N*sizeof(float), 0); cudaMemPrefetchAsync(d_b, N*sizeof(float), 0); cudaMemPrefetchAsync(d_c, N*sizeof(float), 0); #else float* h_a = new float[N]; float* h_b = new float[N]; float* h_res = new float[N]; for (int i = 0; i < N; i++) { h_a[i] = float(rand() % 1000000) / (rand() % 1000 + 1); h_b[i] = float(rand() % 1000000) / (rand() % 1000 + 1); } cudaMalloc(&d_a, N * sizeof(float)); cudaMalloc(&d_b, N * sizeof(float)); cudaMalloc(&d_c, N * sizeof(float)); cudaMemcpy(d_a, h_a, N * sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(d_b, h_b, N * sizeof(float), cudaMemcpyHostToDevice); #endif cudaDeviceSynchronize(); dummy = timer.getCounterMsPrecise(); cout << "alloc/H2D cost = " << dummy << "\n"; constexpr int GRID_DIM = 80; constexpr int BLOCK_DIM = 1024; timer.startCounter(); multiply<<<GRID_DIM, BLOCK_DIM>>>(N, d_c, d_a, d_b); cudaDeviceSynchronize(); dummy = timer.getCounterMsPrecise(); cout << "kernel cost = " << dummy << "\n"; float *res = d_c; float *a = d_a; float *b = d_b; #ifndef USE_MANAGED timer.startCounter(); cudaMemcpy(h_res, d_c, N * sizeof(float), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); dummy = timer.getCounterMsPrecise(); cout << "D2H copy cost = " << timer.getCounterMsPrecise() << "\n"; res = h_res; a = h_a; b = h_b; #endif for (int i = 0; i < N; i++) if (res[i] != (a[i]*b[i])) { cout << "error\n"; exit(1); } return 0; } $ nvcc -o t2230 t2230.cu $ CUDA_VISIBLE_DEVICES="0" ./t2230 Hello world alloc/H2D cost = 453.012 kernel cost = 0.109507 D2H copy cost = 8.04054 $ nvcc -o t2230 t2230.cu -DUSE_MANAGED $ CUDA_VISIBLE_DEVICES="0" ./t2230 Hello world alloc/H2D cost = 411.502 kernel cost = 0.101654 $ （V100，CUDA 11.4）请注意，这假设您处于请求分页的 UM 制度中。如果您不在按需分页机制中（例如，目前在 Maxwell 或 Kepler 设备上，或在 Windows 上，或在 Jetson 上），那么您将不会使用 cudaMemPrefetchAsync，并且数据迁移与内核启动密不可分.还要注意CUDA_VISIBLE_DEVICES的使用。在多 GPU 系统中，UM 可以具有多种不同的行为，具体取决于系统拓扑以及系统中的 GPU。这会使同类比较变得困难。最后，我没有将数据预取回主机，如果你想比较那个活动，你已经得到了一些instruction. 使用托管内存时，cpu和gpu之间有一个底层的交换机制。尤其是第一次运行内核时。如果多次运行内核，执行时间将恢复正常。

c++ optimization cuda malloc gpu

回答 2 投票 0

为什么 calloc 函数不分配数组？

我正在尝试读取一个文件并用文件中的所有字符填充一个数组。问题是在 while 循环中执行停止并且出现分段错误。这是国际...

c malloc allocation calloc

回答 1 投票 0

为什么 calloc 函数只分配 1 作为内存大小？

我正在尝试读取一个文件并用文件中的所有字符填充一个数组。问题是在 while 循环中执行停止并且出现分段错误。这是国际...

c malloc allocation calloc

回答 0 投票 0

malloc 相关问题

最新问题