我有两个应用程序(.dll | .exe)。
[流程1] 这个进程就是dll。 这会在注入时在两个进程之间建立通信。
auto shared_file_obj = CreateFileMappingA(INVALID_HANDLE_VALUE,
0,//pSec,
PAGE_READWRITE,
0,
1024,
("serviceio"));
if (!shared_file_obj)
{
std::cerr << "[-] failed to create shared file object : " << std::hex << GetLastError() << '\n';
return;
}
//std::cout << "[+] created shared file obj" << '\n';
auto shared_memory = MapViewOfFile(shared_file_obj, FILE_MAP_ALL_ACCESS, 0, 0, 1024);
if (!shared_memory)
{
std::cerr << "[-] failed to map shared memory";
return;
}
//std::cout << "[+] mapped shared memory" << '\n';
done_read = CreateEventA(0, TRUE, 0, "i_finished_read");
need_read = CreateEventA(0, TRUE, 0, "i_need_read");
if (!need_read || !done_read)
{
std::cerr << "[-] failed to create either of the events" << '\n';
return;
}
一旦建立了通信,它就会等待来自进程 2 的输入。一旦它通过信号事件收到请求 - 它将执行这段代码。
for (;;) {
// Await signal from external application
WaitForSingleObject(need_read, INFINITE);
// Signal received, parse bytes into struct
io::InMessage* curr_message = reinterpret_cast<io::InMessage*>(packet_space);
// Check if it's in external's control.
if (curr_message->current_control == 0) {
// Control is in the external application, check mode & if address is valid
if (curr_message->mode == 0 && curr_message->address) {
// Read location into bytes
byte* to_ret = read_bytes(curr_message->address, curr_message->size);
// Check validity of what we're writing
if (to_ret) {
// Grab location of where to write the information
PVOID loc_to_write = packet_space + sizeof(io::InMessage);
// Copy from local buffer to shared memory space
memcpy(loc_to_write, to_ret, curr_message->size);
}
}
// Set control back to us
curr_message->current_control = 1;
// Signal event that we're done reading & memory is safe to access from external application
SetEvent(done_read);
}
};
read_bytes 中的代码是一个简单的 memcpy,带有返回的缓冲区。我对这个函数进行了基准测试,结果如下,每次调用平均约为 200 纳秒(基准 read_bytes)
byte* io::read_bytes(std::uintptr_t address, std::uint32_t len)
{
// Setup a return buffer
std::vector<byte> arr;
// Resize to the lenght we're reading
arr.resize(len);
// Call memcpy
memcpy(&arr[0], reinterpret_cast<void*>(address), len);
// Return pointer to the data
return arr.data();
}
[流程2] 进程 2 只需连接所有访问并打开事件
HANDLE hMapFile = OpenFileMappingA(FILE_MAP_ALL_ACCESS, FALSE, "serviceio");
if (hMapFile == NULL) {
throw std::runtime_error(XorStr("couldn't get access to I/O."));
}
// Map the shared memory into the process address space
LPVOID pBuf = MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, BUF_SIZE);
if (pBuf == NULL) {
CloseHandle(hMapFile);
throw std::runtime_error(XorStr("couldn't map I/O."));
}
this->buffer = (BYTE*)pBuf;
// Open event
this->done_read = OpenEventA(EVENT_ALL_ACCESS, 0, "i_finished_read");
if (!this->done_read)
{
throw std::runtime_error(XorStr("couldn't get access to finished read event"));
}
// Open event
this->need_read = OpenEventA(EVENT_ALL_ACCESS, 0, "i_need_read");
if (!this->need_read)
{
throw std::runtime_error(XorStr("couldn't get access to read event"));
}
// Reset events
ResetEvent(this->done_read);
ResetEvent(this->need_read);
我们如何表示我们想要阅读
/// <summary>
/// Reads a value `T` at address `address` from memory.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="address"></param>
/// <returns></returns>
template<typename T>
inline T read(std::uintptr_t address) {
if (!address) return T();
// Set the memory operation to `0`, which is a Read.
InMessage* in_msg = reinterpret_cast<InMessage*>(this->buffer);
// set all the arguments
in_msg->mode = 0;
in_msg->size = sizeof(T);
in_msg->address = address;
// tell the server that the data is ready to be read.
in_msg->current_control = 0;
// set read event
SetEvent(need_read);
// wait for finished read event
WaitForSingleObject(done_read, INFINITE);
T value = *reinterpret_cast<T*>(this->buffer + sizeof(InMessage));
ResetEvent(need_read);
ResetEvent(done_read);
// Read the response from the shared memory segment
return value;
}
我对这个函数进行了基准测试,平均需要 7000-8000 纳秒,比我们之前的 200 纳秒有了巨大的飞跃。
我的问题是,共享内存真的需要这么长时间还是我的实现存在问题?