#ifndef MEMORY_H_INCLUDED
#define MEMORY_H_INCLUDED
#include <algorithm>
#include <cstdint>
#include <memory>
#include <new>
#include <type_traits>
#include <utility>
#include <cstring>
#include "types.h"
#if defined(_WIN64)
#if _WIN32_WINNT < 0x0601
#undef _WIN32_WINNT
#define _WIN32_WINNT 0x0601
#endif
#if !defined(NOMINMAX)
#define NOMINMAX
#endif
#include <windows.h>
#ifdef small
#undef small
#endif
#include <psapi.h>
extern "C" {
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
using AdjustTokenPrivileges_t =
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
}
#endif
namespace Stockfish {
void* std_aligned_alloc(size_t alignment, size_t size);
void std_aligned_free(void* ptr);
void* aligned_large_pages_alloc(size_t size);
void aligned_large_pages_free(void* mem);
bool has_large_pages();
template<typename T, typename FREE_FUNC>
void memory_deleter(T* ptr, FREE_FUNC free_func) {
if (!ptr)
return;
if constexpr (!std::is_trivially_destructible_v<T>)
ptr->~T();
free_func(ptr);
}
template<typename T, typename FREE_FUNC>
void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
if (!ptr)
return;
const size_t array_offset = std::max(sizeof(size_t), alignof(T));
char* raw_memory = reinterpret_cast<char*>(ptr) - array_offset;
if constexpr (!std::is_trivially_destructible_v<T>)
{
const size_t size = *reinterpret_cast<size_t*>(raw_memory);
for (size_t i = size; i-- > 0;)
ptr[i].~T();
}
free_func(raw_memory);
}
template<typename T, typename ALLOC_FUNC, typename... Args>
inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
Args&&... args) {
void* raw_memory = alloc_func(sizeof(T));
ASSERT_ALIGNED(raw_memory, alignof(T));
return new (raw_memory) T(std::forward<Args>(args)...);
}
template<typename T, typename ALLOC_FUNC>
inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
using ElementType = std::remove_extent_t<T>;
const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
char* raw_memory =
reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
ASSERT_ALIGNED(raw_memory, alignof(T));
new (raw_memory) size_t(num);
for (size_t i = 0; i < num; ++i)
new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
return reinterpret_cast<ElementType*>(raw_memory + array_offset);
}
template<typename T>
struct LargePageDeleter {
void operator()(T* ptr) const { return memory_deleter<T>(ptr, aligned_large_pages_free); }
};
template<typename T>
struct LargePageArrayDeleter {
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, aligned_large_pages_free); }
};
template<typename T>
using LargePagePtr =
std::conditional_t<std::is_array_v<T>,
std::unique_ptr<T, LargePageArrayDeleter<std::remove_extent_t<T>>>,
std::unique_ptr<T, LargePageDeleter<T>>>;
template<typename T, typename... Args>
std::enable_if_t<!std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(Args&&... args) {
static_assert(alignof(T) <= 4096,
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
T* obj = memory_allocator<T>(aligned_large_pages_alloc, std::forward<Args>(args)...);
return LargePagePtr<T>(obj);
}
template<typename T>
std::enable_if_t<std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(size_t num) {
using ElementType = std::remove_extent_t<T>;
static_assert(alignof(ElementType) <= 4096,
"aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
ElementType* memory = memory_allocator<T>(aligned_large_pages_alloc, num);
return LargePagePtr<T>(memory);
}
template<typename T>
struct AlignedDeleter {
void operator()(T* ptr) const { return memory_deleter<T>(ptr, std_aligned_free); }
};
template<typename T>
struct AlignedArrayDeleter {
void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, std_aligned_free); }
};
template<typename T>
using AlignedPtr =
std::conditional_t<std::is_array_v<T>,
std::unique_ptr<T, AlignedArrayDeleter<std::remove_extent_t<T>>>,
std::unique_ptr<T, AlignedDeleter<T>>>;
template<typename T, typename... Args>
std::enable_if_t<!std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(Args&&... args) {
const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); };
T* obj = memory_allocator<T>(func, std::forward<Args>(args)...);
return AlignedPtr<T>(obj);
}
template<typename T>
std::enable_if_t<std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(size_t num) {
using ElementType = std::remove_extent_t<T>;
const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); };
ElementType* memory = memory_allocator<T>(func, num);
return AlignedPtr<T>(memory);
}
template<uintptr_t Alignment, typename T>
T* align_ptr_up(T* ptr) {
static_assert(alignof(T) < Alignment);
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
return reinterpret_cast<T*>(
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
}
#if defined(_WIN32)
template<typename FuncYesT, typename FuncNoT>
auto windows_try_with_large_page_priviliges([[maybe_unused]] FuncYesT&& fyes, FuncNoT&& fno) {
#if !defined(_WIN64)
return fno();
#else
HANDLE hProcessToken{};
LUID luid{};
const size_t largePageSize = GetLargePageMinimum();
if (!largePageSize)
return fno();
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
if (!hAdvapi32)
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
auto OpenProcessToken_f =
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
if (!OpenProcessToken_f)
return fno();
auto LookupPrivilegeValueA_f =
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
if (!LookupPrivilegeValueA_f)
return fno();
auto AdjustTokenPrivileges_f =
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
if (!AdjustTokenPrivileges_f)
return fno();
if (!OpenProcessToken_f(
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
return fno();
if (!LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
return fno();
TOKEN_PRIVILEGES tp{};
TOKEN_PRIVILEGES prevTp{};
DWORD prevTpLen = 0;
tp.PrivilegeCount = 1;
tp.Privileges[0].Luid = luid;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
if (!AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
&prevTpLen)
|| GetLastError() != ERROR_SUCCESS)
return fno();
auto&& ret = fyes(largePageSize);
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
CloseHandle(hProcessToken);
return std::forward<decltype(ret)>(ret);
#endif
}
#endif
template<typename T, typename ByteT>
T load_as(const ByteT* buffer) {
static_assert(std::is_trivially_copyable<T>::value, "Type must be trivially copyable");
static_assert(sizeof(ByteT) == 1);
T value;
std::memcpy(&value, buffer, sizeof(T));
return value;
}
}
#endif