在 C++ 中,在 for 循环中复制 std::string 速度要慢得多

问题描述 投票:0回答:1

提供基准测试:https://quick-bench.com/q/oJSjBcuR6A4twOChX_PdoX5vhyU

为什么在for循环中复制std::string(使用operator[])比赋值复制运算符慢得多,并且比做同样事情的std::u8string或char*慢?

这与严格的指针别名有关吗?我如何提示编译器避免这种惩罚?

这两个超级慢:

static void str_loop(benchmark::State& state) {
  std::string   a(N,'a'), b(N,'b'); 
  // Code before the loop is not measured 
  for (auto _ : state) {
    for(size_t i=0;i<N;i++) a[i]=b[i];
    benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
  }
}
BENCHMARK(str_loop);

static void str_loop_data(benchmark::State& state) {
  std::string   a(N,'a'), b(N,'b'); 
  // Code before the loop is not measured 
  for (auto _ : state) {
    for(size_t i=0;i<N;i++) a.data()[i]=b.data()[i];
    benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
  }
}
BENCHMARK(str_loop_data);

其余的几乎同样快得多:


static void str_assign(benchmark::State& state) {
  std::string   a(N,'a'), b(N,'b'); 
  // Code before the loop is not measured 
  for (auto _ : state) {
    a=b;
    benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
  }
}
BENCHMARK(str_assign);

static void u8str_assign(benchmark::State& state) {
  std::u8string c(N,'c'), d(N,'d');
  // Code before the loop is not measured 
  for (auto _ : state) {
    c=d;
    benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
  }
}
BENCHMARK(u8str_assign);

 
static void str_loop_ptr(benchmark::State& state) {
  std::string   a(N,'a'), b(N,'b'); 
  // Code before the loop is not measured 
  for (auto _ : state) {
    char *dst = a.data();
    char *src = b.data();
    for(size_t i=0;i<N;i++) dst[i]=src[i];
    benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
  }
}
BENCHMARK(str_loop_ptr);

 
static void u8str_loop_ptr(benchmark::State& state) {
  std::u8string c(N,'c'), d(N,'d');
  // Code before the loop is not measured 
  for (auto _ : state) {
    auto *dst = c.data();
    auto *src = d.data();
    for(size_t i=0;i<N;i++) dst[i]=src[i];
    benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
  }
}
BENCHMARK(u8str_loop_ptr);

 
static void vec_loop(benchmark::State& state) {
  std::vector<char> e(N,'e'),f(N,'f');
  // Code before the loop is not measured 
  for (auto _ : state) {
    for(size_t i=0;i<N;i++) e[i]=f[i];
    benchmark::DoNotOptimize(e);benchmark::DoNotOptimize(f);
  }
}
BENCHMARK(vec_loop);



static void u8str_loop(benchmark::State& state) {
  std::u8string c(N,'c'), d(N,'d');
  // Code before the loop is not measured 
  for (auto _ : state) {
    for(size_t i=0;i<N;i++) c[i]=d[i];
    benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
  }
}
BENCHMARK(u8str_loop);

static void vec_loop_data(benchmark::State& state) {
  std::vector<char> e(N,'e'),f(N,'f');
  // Code before the loop is not measured 
  for (auto _ : state) {
    for(size_t i=0;i<N;i++) e.data()[i]=f.data()[i];
    benchmark::DoNotOptimize(e);benchmark::DoNotOptimize(f);
  }
}
BENCHMARK(vec_loop_data);



static void u8str_loop_data(benchmark::State& state) {
  std::u8string c(N,'c'), d(N,'d');
  // Code before the loop is not measured 
  for (auto _ : state) {
    for(size_t i=0;i<N;i++) c.data()[i]=d.data()[i];
    benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
  }
}
BENCHMARK(u8str_loop_data);

 
static void str_memcpy(benchmark::State& state) {
  std::string   a(N,'a'), b(N,'b'); 
  // Code before the loop is not measured 
  for (auto _ : state) {
    memcpy(a.data(),b.data(),N*sizeof(a[0]));
    benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
  }
}
BENCHMARK(str_memcpy);

c++ arrays string performance vectorization
1个回答
0
投票

当您在 std::string 中分配单个字符时,只需考虑所有要做的事情:

  • 检查索引是否超出当前字符串长度的末尾。
  • 检查索引是否为负数
  • 检查缓冲区是否需要增长。如果缓冲区需要增长,则必须将整个字符串复制到新分配的缓冲区。
  • 它必须维护空终止符,因此每次写入最后一个字符都意味着它还必须将空终止符写入新位置。
  • 可能不是内联的,所以必须进行函数调用,将值压入堆栈等。

批量复制功能消除了 90% 的工作。只要不需要缓冲区分配,复杂度就相同,但指令数量可能是 O(n) 的 10 倍。 如果它确实必须增加缓冲区,那么它就会变成 O(n * logn)

© www.soinside.com 2019 - 2024. All rights reserved.