提供基准测试:https://quick-bench.com/q/oJSjBcuR6A4twOChX_PdoX5vhyU
为什么在for循环中复制std::string(使用operator[])比赋值复制运算符慢得多,并且比做同样事情的std::u8string或char*慢?
这与严格的指针别名有关吗?我如何提示编译器避免这种惩罚?
这两个超级慢:
static void str_loop(benchmark::State& state) {
std::string a(N,'a'), b(N,'b');
// Code before the loop is not measured
for (auto _ : state) {
for(size_t i=0;i<N;i++) a[i]=b[i];
benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
}
}
BENCHMARK(str_loop);
static void str_loop_data(benchmark::State& state) {
std::string a(N,'a'), b(N,'b');
// Code before the loop is not measured
for (auto _ : state) {
for(size_t i=0;i<N;i++) a.data()[i]=b.data()[i];
benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
}
}
BENCHMARK(str_loop_data);
其余的几乎同样快得多:
static void str_assign(benchmark::State& state) {
std::string a(N,'a'), b(N,'b');
// Code before the loop is not measured
for (auto _ : state) {
a=b;
benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
}
}
BENCHMARK(str_assign);
static void u8str_assign(benchmark::State& state) {
std::u8string c(N,'c'), d(N,'d');
// Code before the loop is not measured
for (auto _ : state) {
c=d;
benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
}
}
BENCHMARK(u8str_assign);
static void str_loop_ptr(benchmark::State& state) {
std::string a(N,'a'), b(N,'b');
// Code before the loop is not measured
for (auto _ : state) {
char *dst = a.data();
char *src = b.data();
for(size_t i=0;i<N;i++) dst[i]=src[i];
benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
}
}
BENCHMARK(str_loop_ptr);
static void u8str_loop_ptr(benchmark::State& state) {
std::u8string c(N,'c'), d(N,'d');
// Code before the loop is not measured
for (auto _ : state) {
auto *dst = c.data();
auto *src = d.data();
for(size_t i=0;i<N;i++) dst[i]=src[i];
benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
}
}
BENCHMARK(u8str_loop_ptr);
static void vec_loop(benchmark::State& state) {
std::vector<char> e(N,'e'),f(N,'f');
// Code before the loop is not measured
for (auto _ : state) {
for(size_t i=0;i<N;i++) e[i]=f[i];
benchmark::DoNotOptimize(e);benchmark::DoNotOptimize(f);
}
}
BENCHMARK(vec_loop);
static void u8str_loop(benchmark::State& state) {
std::u8string c(N,'c'), d(N,'d');
// Code before the loop is not measured
for (auto _ : state) {
for(size_t i=0;i<N;i++) c[i]=d[i];
benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
}
}
BENCHMARK(u8str_loop);
static void vec_loop_data(benchmark::State& state) {
std::vector<char> e(N,'e'),f(N,'f');
// Code before the loop is not measured
for (auto _ : state) {
for(size_t i=0;i<N;i++) e.data()[i]=f.data()[i];
benchmark::DoNotOptimize(e);benchmark::DoNotOptimize(f);
}
}
BENCHMARK(vec_loop_data);
static void u8str_loop_data(benchmark::State& state) {
std::u8string c(N,'c'), d(N,'d');
// Code before the loop is not measured
for (auto _ : state) {
for(size_t i=0;i<N;i++) c.data()[i]=d.data()[i];
benchmark::DoNotOptimize(c);benchmark::DoNotOptimize(d);
}
}
BENCHMARK(u8str_loop_data);
static void str_memcpy(benchmark::State& state) {
std::string a(N,'a'), b(N,'b');
// Code before the loop is not measured
for (auto _ : state) {
memcpy(a.data(),b.data(),N*sizeof(a[0]));
benchmark::DoNotOptimize(a);benchmark::DoNotOptimize(b);
}
}
BENCHMARK(str_memcpy);
当您在 std::string 中分配单个字符时,只需考虑所有要做的事情:
批量复制功能消除了 90% 的工作。只要不需要缓冲区分配,复杂度就相同,但指令数量可能是 O(n) 的 10 倍。 如果它确实必须增加缓冲区,那么它就会变成 O(n * logn)