A little bit ugly, as it uses a 1M array ( so a huge waste of memory).
However the implementation seems to be 5% faster as the original (and more speedup is possible when the incremental update of the non bitboard based code is omitted.
Maybe not scalable to larger patterns.
here some code.
Code: Select all
value1 += Weight[4 + 0 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD1)) | (Mask_Index & (empty >> SFLD1)) << 2]];
value1 += Weight[4 + 1 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD2)) | (Mask_Index & (empty >> SFLD2)) << 2]];
value1 += Weight[4 + 2 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD3)) | (Mask_Index & (empty >> SFLD3)) << 2]];
value1 += Weight[4 + 3 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD4)) | (Mask_Index & (empty >> SFLD4)) << 2]];
value1 += Weight[4 + 4 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD11)) | (Mask_Index & (empty >> SFLD11)) << 2]];
value1 += Weight[4 + 5 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD12)) | (Mask_Index & (empty >> SFLD12)) << 2]];
value1 += Weight[4 + 6 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD13)) | (Mask_Index & (empty >> SFLD13)) << 2]];
value1 += Weight[4 + 7 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD14)) | (Mask_Index & (empty >> SFLD14)) << 2]];
value1 += Weight[4 + 8 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD21)) | (Mask_Index & (empty >> SFLD21)) << 2]];
value1 += Weight[4 + 9 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD22)) | (Mask_Index & (empty >> SFLD22)) << 2]];
value1 += Weight[4 + 10 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD23)) | (Mask_Index & (empty >> SFLD23)) << 2]];
value1 += Weight[4 + 11 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD24)) | (Mask_Index & (empty >> SFLD24)) << 2]];
value1 += Weight[4 + 12 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD31)) | (Mask_Index & (empty >> SFLD31)) << 2]];
value1 += Weight[4 + 13 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD32)) | (Mask_Index & (empty >> SFLD32)) << 2]];
value1 += Weight[4 + 14 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD33)) | (Mask_Index & (empty >> SFLD33)) << 2]];
value1 += Weight[4 + 15 * 6561 + pIndex[(Mask_Index & (blackman >> SFLD34)) | (Mask_Index & (empty >> SFLD34)) << 2]];
Code: Select all
void index_init(void) {
int i, j, iarray[8], ivalue, imodulo;
bit_t black, empty;
for (i = 0; i < 6561; i++) {
ivalue = i;
for (j = 7; j >= 0; --j) { // Calculate number 3-base
imodulo = ivalue % 3;
iarray[j] = imodulo;
ivalue -= imodulo;
ivalue /= 3;
}
black = 0; // Set Black Index
if (iarray[0] == 2) black |= 0x1;
if (iarray[1] == 2) black |= 0x2;
if (iarray[2] == 2) black |= 0x20;
if (iarray[3] == 2) black |= 0x40;
if (iarray[4] == 2) black |= 0x800;
if (iarray[5] == 2) black |= 0x1000;
if (iarray[6] == 2) black |= 0x10000;
if (iarray[7] == 2) black |= 0x20000;
empty = 0; // Set empty Index
if (iarray[0] == 1) empty |= 0x1;
if (iarray[1] == 1) empty |= 0x2;
if (iarray[2] == 1) empty |= 0x20;
if (iarray[3] == 1) empty |= 0x40;
if (iarray[4] == 1) empty |= 0x800;
if (iarray[5] == 1) empty |= 0x1000;
if (iarray[6] == 1) empty |= 0x10000;
if (iarray[7] == 1) empty |= 0x20000;
pIndex[black | (empty << 2)] = i;
}
}
Code: Select all
void eval_init() {
Weight.load("eval");
pIndex = (int*)calloc(1048576, sizeof(int)); // Alloc memory for Evaluation Index Table
memset(pIndex, -1, 1048576); // Clear Evaluation Index Table
index_init();
}
Last but not least, as the main bitboard difference between Damage and Moby Dam compared with Scan are the trailing zeros in the bitboards (so first 6 positions), and all 3 use ghost squares, it is very simple (i guess, just a shift operation on the bitboards) to inject the Scan Eval into both programs (when the non bitboard code is omitted), so maybe interesting to test.
I used my old computer to do these tests, I can try to run the modified Scan on my faster system, and measure the speed.
Bert