Skip to content

Commit

Permalink
Fast Range over modulo for TT indexing (#420)
Browse files Browse the repository at this point in the history

Bench: 4510458

Allows support of many hash sizes as well as up to 128gb.

STC
ELO | 0.51 +- 2.08 (95%)
SPRT | 10.0+0.10s Threads=1 Hash=8MB
LLR | 2.95 (-2.94, 2.94) [-3.00, 0.00]
GAMES | N: 48976 W: 11242 L: 11170 D: 26564
  • Loading branch information
jhonnold committed Nov 30, 2022
1 parent 3374a9d commit 8023261
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 26 deletions.
2 changes: 0 additions & 2 deletions src/bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ void Bench() {
Board board;
SearchParams params = {.depth = 13, .multiPV = 1, .hitrate = 1000, .max = INT_MAX};

CreatePool(1);

Move bestMoves[NUM_BENCH_POSITIONS];
int scores[NUM_BENCH_POSITIONS];
int nodes[NUM_BENCH_POSITIONS];
Expand Down
4 changes: 3 additions & 1 deletion src/berserk.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "nn.h"
#include "random.h"
#include "search.h"
#include "thread.h"
#include "transposition.h"
#include "types.h"
#include "uci.h"
Expand All @@ -38,7 +39,8 @@ int main(int argc, char** argv) {
InitAttacks();

LoadDefaultNN();
TTInit(32);
CreatePool(1);
TTInit(16);

// Compliance for OpenBench
if (argc > 1 && !strncmp(argv[1], "bench", 5)) {
Expand Down
2 changes: 1 addition & 1 deletion src/thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
ThreadData* threads = NULL;
pthread_t* pthreads = NULL;

void* AlignedMalloc(int size) {
void* AlignedMalloc(uint64_t size) {
void* mem = malloc(size + ALIGN_ON + sizeof(void*));
void** ptr = (void**) ((uintptr_t) (mem + ALIGN_ON + sizeof(void*)) & ~(ALIGN_ON - 1));
ptr[-1] = mem;
Expand Down
2 changes: 2 additions & 0 deletions src/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
extern ThreadData* threads;
extern pthread_t* pthreads;

void* AlignedMalloc(uint64_t size);
void AlignedFree(void* ptr);
void CreatePool(int count);
void InitPool(Board* board, SearchParams* params);
void ResetThreadPool();
Expand Down
55 changes: 42 additions & 13 deletions src/transposition.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand All @@ -27,37 +28,61 @@

#include "bits.h"
#include "search.h"
#include "thread.h"
#include "transposition.h"
#include "types.h"

// Global TT
TTTable TT = {0};

size_t TTInit(int mb) {
if (TT.mask) TTFree();
if (TT.mem) TTFree();

uint64_t keySize = (uint64_t) log2(mb) + (uint64_t) log2(MEGABYTE / sizeof(TTBucket));
uint64_t size = (uint64_t) mb * MEGABYTE;

#if defined(__linux__) && !defined(__ANDROID__)
#if defined(__linux__)
// On Linux systems we align on 2MB boundaries and request Huge Pages
TT.buckets = aligned_alloc(2 * MEGABYTE, (1ULL << keySize) * sizeof(TTBucket));
madvise(TT.buckets, (1ULL << keySize) * sizeof(TTBucket), MADV_HUGEPAGE);
TT.mem = aligned_alloc(2 * MEGABYTE, size);
TT.buckets = (TTBucket*) TT.mem;
madvise(TT.buckets, size, MADV_HUGEPAGE);
#else
TT.buckets = calloc((1ULL << keySize), sizeof(TTBucket));
TT.mem = AlignedMalloc(size);
TT.buckets = (TTBucket*) TT.mem;
#endif

TT.mask = (1ULL << keySize) - 1ULL;
TT.count = size / sizeof(TTBucket);

TTClear();
return (TT.mask + 1ULL) * sizeof(TTBucket);
return size;
}

void TTFree() {
free(TT.buckets);
#if defined(__linux__)
free(TT.mem);
#else
AlignedFree(TT.mem);
#endif
}

void* TTClearPart(void* arg) {
// Logic for dividing the work taken from Weiss (which got from CFish)
ThreadData* thread = (ThreadData*) arg;
int idx = thread->idx;
int count = thread->count;

uint64_t size = TT.count * sizeof(TTBucket);
uint64_t slice = (size + count - 1) / count;
uint64_t blocks = (slice + 2 * MEGABYTE - 1) / (2 * MEGABYTE);
uint64_t begin = min(size, idx * blocks * 2 * MEGABYTE);
uint64_t end = min(size, begin + blocks * 2 * MEGABYTE);

memset(TT.buckets + begin / sizeof(TTBucket), 0, end - begin);
return NULL;
}

inline void TTClear() {
memset(TT.buckets, 0, (TT.mask + 1ULL) * sizeof(TTBucket));
for (int i = 0; i < threads->count; i++) pthread_create(&pthreads[i], NULL, TTClearPart, &threads[i]);
for (int i = 0; i < threads->count; i++) pthread_join(pthreads[i], NULL);
}

inline void TTUpdate() {
Expand All @@ -70,12 +95,16 @@ inline int TTScore(TTEntry* e, int ply) {
return e->score > MATE_BOUND ? e->score - ply : e->score < -MATE_BOUND ? e->score + ply : e->score;
}

inline uint32_t TTIdx(uint64_t hash) {
return ((uint32_t) hash * (uint64_t) TT.count) >> 32;
}

inline void TTPrefetch(uint64_t hash) {
__builtin_prefetch(&TT.buckets[TT.mask & hash]);
__builtin_prefetch(&TT.buckets[TTIdx(hash)]);
}

inline TTEntry* TTProbe(uint64_t hash) {
TTEntry* bucket = TT.buckets[TT.mask & hash].entries;
TTEntry* bucket = TT.buckets[TTIdx(hash)].entries;
uint32_t shortHash = hash >> 32;

for (int i = 0; i < BUCKET_SIZE; i++)
Expand All @@ -88,7 +117,7 @@ inline TTEntry* TTProbe(uint64_t hash) {
}

inline void TTPut(uint64_t hash, int8_t depth, int16_t score, uint8_t flag, Move move, int ply, int16_t eval, int pv) {
TTBucket* bucket = &TT.buckets[TT.mask & hash];
TTBucket* bucket = &TT.buckets[TTIdx(hash)];
uint32_t shortHash = hash >> 32;
TTEntry* toReplace = bucket->entries;

Expand Down
6 changes: 3 additions & 3 deletions src/transposition.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "types.h"

#define NO_ENTRY 0ULL
#define MEGABYTE 0x100000ULL
#define MEGABYTE (1024ull * 1024ull)
#define BUCKET_SIZE 2

typedef struct {
Expand All @@ -35,9 +35,9 @@ typedef struct {
} TTBucket;

typedef struct {
void* mem;
TTBucket* buckets;
uint64_t mask;
uint64_t size;
uint64_t count;
uint8_t age;
} TTTable;

Expand Down
11 changes: 5 additions & 6 deletions src/uci.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ void ParsePosition(char* in, Board* board) {
void PrintUCIOptions() {
printf("id name Berserk " VERSION "\n");
printf("id author Jay Honnold\n");
printf("option name Hash type spin default 32 min 4 max 65536\n");
printf("option name Hash type spin default 16 min 2 max 131072\n");
printf("option name Threads type spin default 1 min 1 max 256\n");
printf("option name SyzygyPath type string default <empty>\n");
printf("option name MultiPV type spin default 1 min 1 max 256\n");
Expand All @@ -247,7 +247,6 @@ void UCILoop() {
Board board;
ParseFen(START_FEN, &board);

CreatePool(1);
SearchParams searchParameters = {.quit = 0};

setbuf(stdin, NULL);
Expand Down Expand Up @@ -347,10 +346,10 @@ void UCILoop() {
} else
printf("info string Invalid move!\n");
} else if (!strncmp(in, "setoption name Hash value ", 26)) {
int mb = GetOptionIntValue(in);
mb = max(4, min(65536, mb));
int64_t bytesAllocated = TTInit(mb);
printf("info string set Hash to value %d (%" PRId64 " bytes)\n", mb, bytesAllocated);
int mb = GetOptionIntValue(in);
mb = max(2, min(131072, mb));
uint64_t bytesAllocated = TTInit(mb);
printf("info string set Hash to value %d (%" PRIu64 " bytes)\n", mb, bytesAllocated);
} else if (!strncmp(in, "setoption name Threads value ", 29)) {
int n = GetOptionIntValue(in);
CreatePool(max(1, min(256, n)));
Expand Down

0 comments on commit 8023261

Please sign in to comment.