uint8_t data[] = "mykeyxyz:1234\naaaaaaaaaaa";
我的字符串行具有格式 key:value
,其中 len(key) <= 16
保证。我想将 mykeyxyz
加载到 __m128i
中,但将较高的位置填为 0。
最简单的方法是使用 255 或 0 个掩码的数组,但这需要另一个内存负载。 有没有办法做得更快?
#include <iostream>
#include <immintrin.h>
#include <string>
#include <cstring>
using namespace std;
alignas(4096) const uint8_t strcmp_mask[32] = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
int main()
{
uint8_t data[] = "mykeyxyz:1234\naaaaaaaaaaa";
__m128i chars = _mm_loadu_si128((__m128i*)data);
__m128i separators = _mm_set1_epi8(':');
__m128i compared = _mm_cmpeq_epi8(chars, separators);
uint32_t separator_mask = _mm_movemask_epi8(compared);
uint32_t len = __builtin_ctz(separator_mask);
cout << "len = " << len << "\n";
__m128i mask = _mm_loadu_si128((__m128i*)(strcmp_mask + 16 - len));
__m128i key_chars = _mm_and_si128(chars, mask);
uint8_t res[16];
memcpy(res, (char*)&key_chars, 16);
for (int i = 0; i < 16; i++) cout << int(res[i]) << " ";
cout << "\n";
}
// len = 8
// 109 121 107 101 121 120 121 122 0 0 0 0 0 0 0 0
使用AVX-512,你可以生成这样的掩码:
__m128i make_mask(int n, unsigned char c) {
__m128i src{};
__mmask16 mask = (1 << n) - 1;
__m128i a = _mm_set1_epi8(c);
return _mm_mask_broadcastb_epi8(src, mask, a);
}
make_mask(int, unsigned char):
mov eax, 1
shlx eax, eax, edi
sub eax, 1
kmovw k1, eax
vpbroadcastb xmm0{k1}{z}, esi
ret
使用它,您可以像这样屏蔽字符串:
int main() {
unsigned char data[] = "mykeyxyz:1234\naaaaaaaaaaa";
__m128i chars = _mm_loadu_si128((__m128i*)data);
int pos = find(chars, ':'); // find index of character; you have already figured this out
__m128i mask = make_mask(pos, 0xff);
__m128i result = _mm_and_si128(chars, mask);
puts((char*) &result);
}
此代码打印
mykeyxyz
。
请参阅编译器资源管理器中的实时代码。