diff --git a/DroidFishApp/src/main/assets/nn-3475407dc199.nnue b/DroidFishApp/src/main/assets/nn-3475407dc199.nnue new file mode 100644 index 0000000..d1fdcf2 Binary files /dev/null and b/DroidFishApp/src/main/assets/nn-3475407dc199.nnue differ diff --git a/DroidFishApp/src/main/assets/nn-62ef826d1a6d.nnue b/DroidFishApp/src/main/assets/nn-62ef826d1a6d.nnue deleted file mode 100644 index 0a40ce2..0000000 Binary files a/DroidFishApp/src/main/assets/nn-62ef826d1a6d.nnue and /dev/null differ diff --git a/DroidFishApp/src/main/cpp/stockfish/Android.mk b/DroidFishApp/src/main/cpp/stockfish/Android.mk index a239b2e..35969fa 100644 --- a/DroidFishApp/src/main/cpp/stockfish/Android.mk +++ b/DroidFishApp/src/main/cpp/stockfish/Android.mk @@ -5,7 +5,7 @@ SF_SRC_FILES := \ bitbase.cpp endgame.cpp material.cpp movepick.cpp position.cpp timeman.cpp \ tune.cpp ucioption.cpp \ bitboard.cpp evaluate.cpp misc.cpp search.cpp tt.cpp syzygy/tbprobe.cpp \ - nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp + nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp MY_ARCH_DEF := ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) diff --git a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp index 7cb0438..7945a45 100644 --- a/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/benchmark.cpp @@ -92,6 +92,8 @@ const vector Defaults = { } // namespace +namespace Stockfish { + /// setup_bench() builds a list of UCI commands to be run by bench. There /// are five parameters: TT size in MB, number of search threads that /// should be used, the limit value spent for each position, a file name @@ -168,3 +170,5 @@ vector setup_bench(const Position& current, istream& is) { return list; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp index b640eab..27bf409 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/bitbase.cpp @@ -23,6 +23,8 @@ #include "bitboard.h" #include "types.h" +namespace Stockfish { + namespace { // There are 24 possible pawn squares: files A to D and ranks from 2 to 7. @@ -66,7 +68,6 @@ namespace { } // namespace - bool Bitbases::probe(Square wksq, Square wpsq, Square bksq, Color stm) { assert(file_of(wpsq) <= FILE_D); @@ -96,7 +97,6 @@ void Bitbases::init() { KPKBitbase.set(idx); } - namespace { KPKPosition::KPKPosition(unsigned idx) { @@ -150,8 +150,8 @@ namespace { Bitboard b = attacks_bb(ksq[stm]); while (b) - r |= stm == WHITE ? db[index(BLACK, ksq[BLACK] , pop_lsb(&b), psq)] - : db[index(WHITE, pop_lsb(&b), ksq[WHITE], psq)]; + r |= stm == WHITE ? db[index(BLACK, ksq[BLACK], pop_lsb(b), psq)] + : db[index(WHITE, pop_lsb(b), ksq[WHITE], psq)]; if (stm == WHITE) { @@ -168,3 +168,5 @@ namespace { } } // namespace + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp index 841aa0b..6b84b51 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.cpp @@ -22,11 +22,14 @@ #include "bitboard.h" #include "misc.h" +namespace Stockfish { + uint8_t PopCnt16[1 << 16]; uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; Bitboard SquareBB[SQUARE_NB]; Bitboard LineBB[SQUARE_NB][SQUARE_NB]; +Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; @@ -42,7 +45,6 @@ namespace { } - /// safe_destination() returns the bitboard of target square for the given step /// from the given square. If the step is off the board, returns empty bitboard. @@ -55,7 +57,7 @@ inline Bitboard safe_destination(Square s, int step) { /// Bitboards::pretty() returns an ASCII representation of a bitboard suitable /// to be printed to standard output. Useful for debugging. -const std::string Bitboards::pretty(Bitboard b) { +std::string Bitboards::pretty(Bitboard b) { std::string s = "+---+---+---+---+---+---+---+---+\n"; @@ -106,12 +108,17 @@ void Bitboards::init() { for (PieceType pt : { BISHOP, ROOK }) for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) + { if (PseudoAttacks[pt][s1] & s2) - LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; + { + LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; + BetweenBB[s1][s2] = (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); + } + BetweenBB[s1][s2] |= s2; + } } } - namespace { Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { @@ -123,7 +130,7 @@ namespace { for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) { Square s = sq; - while(safe_destination(s, d) && !(occupied & s)) + while (safe_destination(s, d) && !(occupied & s)) attacks |= (s += d); } @@ -211,3 +218,5 @@ namespace { } } } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/bitboard.h b/DroidFishApp/src/main/cpp/stockfish/bitboard.h index 95591fc..b29f3e2 100644 --- a/DroidFishApp/src/main/cpp/stockfish/bitboard.h +++ b/DroidFishApp/src/main/cpp/stockfish/bitboard.h @@ -23,19 +23,21 @@ #include "types.h" +namespace Stockfish { + namespace Bitbases { void init(); bool probe(Square wksq, Square wpsq, Square bksq, Color us); -} +} // namespace Stockfish::Bitbases namespace Bitboards { void init(); -const std::string pretty(Bitboard b); +std::string pretty(Bitboard b); -} +} // namespace Stockfish::Bitboards constexpr Bitboard AllSquares = ~Bitboard(0); constexpr Bitboard DarkSquares = 0xAA55AA55AA55AA55ULL; @@ -73,6 +75,7 @@ extern uint8_t PopCnt16[1 << 16]; extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; extern Bitboard SquareBB[SQUARE_NB]; +extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; extern Bitboard LineBB[SQUARE_NB][SQUARE_NB]; extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; @@ -209,23 +212,29 @@ constexpr Bitboard adjacent_files_bb(Square s) { inline Bitboard line_bb(Square s1, Square s2) { assert(is_ok(s1) && is_ok(s2)); + return LineBB[s1][s2]; } -/// between_bb() returns a bitboard representing squares that are linearly -/// between the two given squares (excluding the given squares). If the given -/// squares are not on a same file/rank/diagonal, we return 0. For instance, -/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5 and E6. +/// between_bb(s1, s2) returns a bitboard representing the squares in the semi-open +/// segment between the squares s1 and s2 (excluding s1 but including s2). If the +/// given squares are not on a same file/rank/diagonal, it returns s2. For instance, +/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but +/// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick +/// allows to generate non-king evasion moves faster: the defending piece must either +/// interpose itself to cover the check or capture the checking piece. inline Bitboard between_bb(Square s1, Square s2) { - Bitboard b = line_bb(s1, s2) & ((AllSquares << s1) ^ (AllSquares << s2)); - return b & (b - 1); //exclude lsb + + assert(is_ok(s1) && is_ok(s2)); + + return BetweenBB[s1][s2]; } -/// forward_ranks_bb() returns a bitboard representing the squares on the ranks -/// in front of the given one, from the point of view of the given color. For instance, +/// forward_ranks_bb() returns a bitboard representing the squares on the ranks in +/// front of the given one, from the point of view of the given color. For instance, /// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2. constexpr Bitboard forward_ranks_bb(Color c, Square s) { @@ -412,13 +421,20 @@ inline Square msb(Bitboard b) { #endif +/// least_significant_square_bb() returns the bitboard of the least significant +/// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)). + +inline Bitboard least_significant_square_bb(Bitboard b) { + assert(b); + return b & -b; +} /// pop_lsb() finds and clears the least significant bit in a non-zero bitboard -inline Square pop_lsb(Bitboard* b) { - assert(*b); - const Square s = lsb(*b); - *b &= *b - 1; +inline Square pop_lsb(Bitboard& b) { + assert(b); + const Square s = lsb(b); + b &= b - 1; return s; } @@ -430,4 +446,6 @@ inline Square frontmost_sq(Color c, Bitboard b) { return c == WHITE ? msb(b) : lsb(b); } +} // namespace Stockfish + #endif // #ifndef BITBOARD_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp index 1489a36..a44d3a1 100644 --- a/DroidFishApp/src/main/cpp/stockfish/endgame.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/endgame.cpp @@ -22,6 +22,8 @@ #include "endgame.h" #include "movegen.h" +namespace Stockfish { + namespace { // Used to drive the king towards the edge of the board @@ -741,3 +743,5 @@ ScaleFactor Endgame::operator()(const Position& pos) const { // it's probably at least a draw even with the pawn. return Bitbases::probe(strongKing, strongPawn, weakKing, us) ? SCALE_FACTOR_NONE : SCALE_FACTOR_DRAW; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/endgame.h b/DroidFishApp/src/main/cpp/stockfish/endgame.h index 860cc86..146111b 100644 --- a/DroidFishApp/src/main/cpp/stockfish/endgame.h +++ b/DroidFishApp/src/main/cpp/stockfish/endgame.h @@ -28,6 +28,7 @@ #include "position.h" #include "types.h" +namespace Stockfish { /// EndgameCode lists all supported endgame functions by corresponding codes @@ -120,4 +121,6 @@ namespace Endgames { } } +} // namespace Stockfish + #endif // #ifndef ENDGAME_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp index 6f23bf1..f975479 100644 --- a/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.cpp @@ -33,6 +33,7 @@ #include "misc.h" #include "pawns.h" #include "thread.h" +#include "timeman.h" #include "uci.h" #include "incbin/incbin.h" @@ -54,7 +55,8 @@ using namespace std; -using namespace Eval::NNUE; + +namespace Stockfish { namespace Eval { @@ -110,8 +112,6 @@ namespace Eval { eval_file_loaded = eval_file; } } - if (eval_file_loaded != eval_file) - eval_file_loaded = ""; } /// NNUE::verify() verifies that the last net used was loaded successfully @@ -180,7 +180,7 @@ namespace Trace { else os << scores[t][WHITE] << " | " << scores[t][BLACK]; - os << " | " << scores[t][WHITE] - scores[t][BLACK] << "\n"; + os << " | " << scores[t][WHITE] - scores[t][BLACK] << " |\n"; return os; } } @@ -190,11 +190,9 @@ using namespace Trace; namespace { // Threshold for lazy and space evaluation - constexpr Value LazyThreshold1 = Value(1565); - constexpr Value LazyThreshold2 = Value(1102); - constexpr Value SpaceThreshold = Value(11551); - constexpr Value NNUEThreshold1 = Value(682); - constexpr Value NNUEThreshold2 = Value(176); + constexpr Value LazyThreshold1 = Value(1565); + constexpr Value LazyThreshold2 = Value(1102); + constexpr Value SpaceThreshold = Value(11551); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -257,11 +255,12 @@ namespace { S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) }; + constexpr Value CorneredBishop = Value(50); + // Assorted bonuses and penalties constexpr Score UncontestedOutpost = S( 1, 10); constexpr Score BishopOnKingRing = S( 24, 0); constexpr Score BishopXRayPawns = S( 4, 5); - constexpr Score CorneredBishop = S( 50, 50); constexpr Score FlankAttacks = S( 8, 0); constexpr Score Hanging = S( 69, 36); constexpr Score KnightOnQueen = S( 16, 11); @@ -396,8 +395,9 @@ namespace { attackedBy[Us][Pt] = 0; - while (b1) { - Square s = pop_lsb(&b1); + while (b1) + { + Square s = pop_lsb(b1); // Find attacked squares, including x-ray attacks for bishops and rooks b = Pt == BISHOP ? attacks_bb(s, pos.pieces() ^ pos.pieces(QUEEN)) @@ -477,9 +477,8 @@ namespace { { Direction d = pawn_push(Us) + (file_of(s) == FILE_A ? EAST : WEST); if (pos.piece_on(s + d) == make_piece(Us, PAWN)) - score -= !pos.empty(s + d + pawn_push(Us)) ? CorneredBishop * 4 - : pos.piece_on(s + d + d) == make_piece(Us, PAWN) ? CorneredBishop * 2 - : CorneredBishop; + score -= !pos.empty(s + d + pawn_push(Us)) ? 4 * make_score(CorneredBishop, CorneredBishop) + : 3 * make_score(CorneredBishop, CorneredBishop); } } } @@ -658,11 +657,11 @@ namespace { { b = (defended | weak) & (attackedBy[Us][KNIGHT] | attackedBy[Us][BISHOP]); while (b) - score += ThreatByMinor[type_of(pos.piece_on(pop_lsb(&b)))]; + score += ThreatByMinor[type_of(pos.piece_on(pop_lsb(b)))]; b = weak & attackedBy[Us][ROOK]; while (b) - score += ThreatByRook[type_of(pos.piece_on(pop_lsb(&b)))]; + score += ThreatByRook[type_of(pos.piece_on(pop_lsb(b)))]; if (weak & attackedBy[Us][KING]) score += ThreatByKing; @@ -760,7 +759,7 @@ namespace { while (b) { - Square s = pop_lsb(&b); + Square s = pop_lsb(b); assert(!(pos.pieces(Them, PAWN) & forward_file_bb(Us, s + Up))); @@ -906,7 +905,7 @@ namespace { Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK; int sf = me->scale_factor(pos, strongSide); - // If scale factor is not already specific, scale down via general heuristics + // If scale factor is not already specific, scale up/down via general heuristics if (sf == SCALE_FACTOR_NORMAL) { if (pos.opposite_bishops()) @@ -979,7 +978,7 @@ namespace { // Initialize score by reading the incrementally updated scores included in // the position object (material + piece square tables) and the material // imbalance. Score is computed internally from the white point of view. - Score score = pos.psq_score() + me->imbalance() + pos.this_thread()->contempt; + Score score = pos.psq_score() + me->imbalance() + pos.this_thread()->trend; // Probe the pawn hash table pe = Pawns::probe(pos); @@ -1033,12 +1032,48 @@ make_v: v = (v / 16) * 16; // Side to move point of view - v = (pos.side_to_move() == WHITE ? v : -v) + Tempo; + v = (pos.side_to_move() == WHITE ? v : -v); return v; } -} // namespace + + /// Fisher Random Chess: correction for cornered bishops, to fix chess960 play with NNUE + + Value fix_FRC(const Position& pos) { + + constexpr Bitboard Corners = 1ULL << SQ_A1 | 1ULL << SQ_H1 | 1ULL << SQ_A8 | 1ULL << SQ_H8; + + if (!(pos.pieces(BISHOP) & Corners)) + return VALUE_ZERO; + + int correction = 0; + + if ( pos.piece_on(SQ_A1) == W_BISHOP + && pos.piece_on(SQ_B2) == W_PAWN) + correction += !pos.empty(SQ_B3) ? -CorneredBishop * 4 + : -CorneredBishop * 3; + + if ( pos.piece_on(SQ_H1) == W_BISHOP + && pos.piece_on(SQ_G2) == W_PAWN) + correction += !pos.empty(SQ_G3) ? -CorneredBishop * 4 + : -CorneredBishop * 3; + + if ( pos.piece_on(SQ_A8) == B_BISHOP + && pos.piece_on(SQ_B7) == B_PAWN) + correction += !pos.empty(SQ_B6) ? CorneredBishop * 4 + : CorneredBishop * 3; + + if ( pos.piece_on(SQ_H8) == B_BISHOP + && pos.piece_on(SQ_G7) == B_PAWN) + correction += !pos.empty(SQ_G6) ? CorneredBishop * 4 + : CorneredBishop * 3; + + return pos.side_to_move() == WHITE ? Value(correction) + : -Value(correction); + } + +} // namespace Eval /// evaluate() is the evaluator for the outer world. It returns a static @@ -1053,32 +1088,28 @@ Value Eval::evaluate(const Position& pos) { else { // Scale and shift NNUE for compatibility with search and classical evaluation - auto adjusted_NNUE = [&](){ - int mat = pos.non_pawn_material() + 2 * PawnValueMg * pos.count(); - return NNUE::evaluate(pos) * (641 + mat / 32 - 4 * pos.rule50_count()) / 1024 + Tempo; + auto adjusted_NNUE = [&]() + { + int scale = 903 + + 32 * pos.count() + + 32 * pos.non_pawn_material() / 1024; + + Value nnue = NNUE::evaluate(pos, true) * scale / 1024; + + if (pos.is_chess960()) + nnue += fix_FRC(pos); + + return nnue; }; - // If there is PSQ imbalance use classical eval, with small probability if it is small + // If there is PSQ imbalance we use the classical eval, but we switch to + // NNUE eval faster when shuffling or if the material on the board is high. + int r50 = pos.rule50_count(); Value psq = Value(abs(eg_value(pos.psq_score()))); - int r50 = 16 + pos.rule50_count(); - bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50; - bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); + bool classical = psq * 5 > (750 + pos.non_pawn_material() / 64) * (5 + r50); - // Use classical evaluation for really low piece endgames. - // The most critical case is a bishop + A/H file pawn vs naked king draw. - bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count() < 2; - - v = classical || strongClassical ? Evaluation(pos).value() : adjusted_NNUE(); - - // If the classical eval is small and imbalance large, use NNUE nevertheless. - // For the case of opposite colored bishops, switch to NNUE eval with - // small probability if the classical eval is less than the threshold. - if ( largePsq && !strongClassical - && ( abs(v) * 16 < NNUEThreshold2 * r50 - || ( pos.opposite_bishops() - && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50 - && !(pos.this_thread()->nodes & 0xB)))) - v = adjusted_NNUE(); + v = classical ? Evaluation(pos).value() // classical + : adjusted_NNUE(); // NNUE } // Damp down the evaluation linearly when shuffling @@ -1095,7 +1126,7 @@ Value Eval::evaluate(const Position& pos) { /// descriptions and values of each evaluation term. Useful for debugging. /// Trace scores are from white's point of view -std::string Eval::trace(const Position& pos) { +std::string Eval::trace(Position& pos) { if (pos.checkers()) return "Final evaluation: none (in check)"; @@ -1107,44 +1138,55 @@ std::string Eval::trace(const Position& pos) { std::memset(scores, 0, sizeof(scores)); - pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt + pos.this_thread()->trend = SCORE_ZERO; // Reset any dynamic contempt v = Evaluation(pos).value(); ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) - << " Term | White | Black | Total \n" - << " | MG EG | MG EG | MG EG \n" - << " ------------+-------------+-------------+------------\n" - << " Material | " << Term(MATERIAL) - << " Imbalance | " << Term(IMBALANCE) - << " Pawns | " << Term(PAWN) - << " Knights | " << Term(KNIGHT) - << " Bishops | " << Term(BISHOP) - << " Rooks | " << Term(ROOK) - << " Queens | " << Term(QUEEN) - << " Mobility | " << Term(MOBILITY) - << " King safety | " << Term(KING) - << " Threats | " << Term(THREAT) - << " Passed | " << Term(PASSED) - << " Space | " << Term(SPACE) - << " Winnable | " << Term(WINNABLE) - << " ------------+-------------+-------------+------------\n" - << " Total | " << Term(TOTAL); - - v = pos.side_to_move() == WHITE ? v : -v; - - ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n"; + << " Contributing terms for the classical eval:\n" + << "+------------+-------------+-------------+-------------+\n" + << "| Term | White | Black | Total |\n" + << "| | MG EG | MG EG | MG EG |\n" + << "+------------+-------------+-------------+-------------+\n" + << "| Material | " << Term(MATERIAL) + << "| Imbalance | " << Term(IMBALANCE) + << "| Pawns | " << Term(PAWN) + << "| Knights | " << Term(KNIGHT) + << "| Bishops | " << Term(BISHOP) + << "| Rooks | " << Term(ROOK) + << "| Queens | " << Term(QUEEN) + << "| Mobility | " << Term(MOBILITY) + << "|King safety | " << Term(KING) + << "| Threats | " << Term(THREAT) + << "| Passed | " << Term(PASSED) + << "| Space | " << Term(SPACE) + << "| Winnable | " << Term(WINNABLE) + << "+------------+-------------+-------------+-------------+\n" + << "| Total | " << Term(TOTAL) + << "+------------+-------------+-------------+-------------+\n"; + if (Eval::useNNUE) + ss << '\n' << NNUE::trace(pos) << '\n'; + + ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); + + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nClassical evaluation " << to_cp(v) << " (white side)\n"; if (Eval::useNNUE) { - v = NNUE::evaluate(pos); + v = NNUE::evaluate(pos, false); v = pos.side_to_move() == WHITE ? v : -v; - ss << "\nNNUE evaluation: " << to_cp(v) << " (white side)\n"; + ss << "NNUE evaluation " << to_cp(v) << " (white side)\n"; } v = evaluate(pos); v = pos.side_to_move() == WHITE ? v : -v; - ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; + ss << "Final evaluation " << to_cp(v) << " (white side)"; + if (Eval::useNNUE) + ss << " [with scaled NNUE, hybrid, ...]"; + ss << "\n"; return ss.str(); } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/evaluate.h b/DroidFishApp/src/main/cpp/stockfish/evaluate.h index 8beca2d..91da01d 100644 --- a/DroidFishApp/src/main/cpp/stockfish/evaluate.h +++ b/DroidFishApp/src/main/cpp/stockfish/evaluate.h @@ -20,14 +20,17 @@ #define EVALUATE_H_INCLUDED #include +#include #include "types.h" +namespace Stockfish { + class Position; namespace Eval { - std::string trace(const Position& pos); + std::string trace(Position& pos); Value evaluate(const Position& pos); extern bool useNNUE; @@ -36,17 +39,24 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-62ef826d1a6d.nnue" + #define EvalFileDefaultName "nn-3475407dc199.nnue" namespace NNUE { - Value evaluate(const Position& pos); - bool load_eval(std::string name, std::istream& stream); + std::string trace(Position& pos); + Value evaluate(const Position& pos, bool adjusted = false); + void init(); void verify(); + bool load_eval(std::string name, std::istream& stream); + bool save_eval(std::ostream& stream); + bool save_eval(const std::optional& filename); + } // namespace NNUE } // namespace Eval +} // namespace Stockfish + #endif // #ifndef EVALUATE_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/main.cpp b/DroidFishApp/src/main/cpp/stockfish/main.cpp index ef66246..62e0ed5 100644 --- a/DroidFishApp/src/main/cpp/stockfish/main.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/main.cpp @@ -28,6 +28,8 @@ #include "tt.h" #include "uci.h" +using namespace Stockfish; + int main(int argc, char* argv[]) { std::cout << engine_info() << std::endl; diff --git a/DroidFishApp/src/main/cpp/stockfish/material.cpp b/DroidFishApp/src/main/cpp/stockfish/material.cpp index e76641d..9d17af2 100644 --- a/DroidFishApp/src/main/cpp/stockfish/material.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/material.cpp @@ -24,6 +24,8 @@ using namespace std; +namespace Stockfish { + namespace { #define S(mg, eg) make_score(mg, eg) @@ -72,7 +74,7 @@ namespace { bool is_KBPsK(const Position& pos, Color us) { return pos.non_pawn_material(us) == BishopValueMg - && pos.count(us) >= 1; + && pos.count(us) >= 1; } bool is_KQKRPs(const Position& pos, Color us) { @@ -223,3 +225,5 @@ Entry* probe(const Position& pos) { } } // namespace Material + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/material.h b/DroidFishApp/src/main/cpp/stockfish/material.h index be26425..26535a5 100644 --- a/DroidFishApp/src/main/cpp/stockfish/material.h +++ b/DroidFishApp/src/main/cpp/stockfish/material.h @@ -24,7 +24,7 @@ #include "position.h" #include "types.h" -namespace Material { +namespace Stockfish::Material { /// Material::Entry contains various information about a material configuration. /// It contains a material imbalance evaluation, a function pointer to a special @@ -66,6 +66,6 @@ typedef HashTable Table; Entry* probe(const Position& pos); -} // namespace Material +} // namespace Stockfish::Material #endif // #ifndef MATERIAL_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.cpp b/DroidFishApp/src/main/cpp/stockfish/misc.cpp index ebc7c02..78227ee 100644 --- a/DroidFishApp/src/main/cpp/stockfish/misc.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/misc.cpp @@ -51,7 +51,7 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); #include #endif -#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) +#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) || defined(__e2k__) #define POSIXALIGNEDALLOC #include #endif @@ -61,11 +61,13 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); using namespace std; +namespace Stockfish { + namespace { /// Version number. If Version is left empty, then compile date in the format /// DD-MM-YY and show in engine_info. -const string Version = "13"; +const string Version = "14"; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We @@ -138,7 +140,7 @@ public: /// the program was compiled) or "Stockfish ", depending on whether /// Version is empty. -const string engine_info(bool to_uci) { +string engine_info(bool to_uci) { const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); string month, day, year; @@ -161,7 +163,7 @@ const string engine_info(bool to_uci) { /// compiler_info() returns a string trying to describe the compiler we use -const std::string compiler_info() { +std::string compiler_info() { #define stringify2(x) #x #define stringify(x) stringify2(x) @@ -190,6 +192,18 @@ const std::string compiler_info() { compiler += "(version "; compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); compiler += ")"; + #elif defined(__e2k__) && defined(__LCC__) + #define dot_ver2(n) \ + compiler += (char)'.'; \ + compiler += (char)('0' + (n) / 10); \ + compiler += (char)('0' + (n) % 10); + + compiler += "MCST LCC "; + compiler += "(version "; + compiler += std::to_string(__LCC__ / 100); + dot_ver2(__LCC__ % 100) + dot_ver2(__LCC_MINOR__) + compiler += ")"; #elif __GNUC__ compiler += "g++ (GNUC) "; compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); @@ -361,7 +375,11 @@ void std_aligned_free(void* ptr) { #if defined(_WIN32) -static void* aligned_large_pages_alloc_win(size_t allocSize) { +static void* aligned_large_pages_alloc_windows(size_t allocSize) { + + #if !defined(_WIN64) + return nullptr; + #else HANDLE hProcessToken { }; LUID luid { }; @@ -404,12 +422,14 @@ static void* aligned_large_pages_alloc_win(size_t allocSize) { CloseHandle(hProcessToken); return mem; + + #endif } void* aligned_large_pages_alloc(size_t allocSize) { // Try to allocate large pages - void* mem = aligned_large_pages_alloc_win(allocSize); + void* mem = aligned_large_pages_alloc_windows(allocSize); // Fall back to regular, page aligned, allocation if necessary if (!mem) @@ -449,8 +469,9 @@ void aligned_large_pages_free(void* mem) { if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) { DWORD err = GetLastError(); - std::cerr << "Failed to free transposition table. Error code: 0x" << - std::hex << err << std::dec << std::endl; + std::cerr << "Failed to free large page memory. Error code: 0x" + << std::hex << err + << std::dec << std::endl; exit(EXIT_FAILURE); } } @@ -626,3 +647,5 @@ void init(int argc, char* argv[]) { } // namespace CommandLine + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/misc.h b/DroidFishApp/src/main/cpp/stockfish/misc.h index 7b551ad..dae37cd 100644 --- a/DroidFishApp/src/main/cpp/stockfish/misc.h +++ b/DroidFishApp/src/main/cpp/stockfish/misc.h @@ -28,8 +28,10 @@ #include "types.h" -const std::string engine_info(bool to_uci = false); -const std::string compiler_info(); +namespace Stockfish { + +std::string engine_info(bool to_uci = false); +std::string compiler_info(); void prefetch(void* addr); void start_logger(const std::string& fname); void* std_aligned_alloc(size_t alignment, size_t size); @@ -64,9 +66,10 @@ std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK -// `ptr` must point to an array of size at least -// `sizeof(T) * N + alignment` bytes, where `N` is the -// number of elements in the array. + +// align_ptr_up() : get the first aligned element of an array. +// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, +// where N is the number of elements in the array. template T* align_ptr_up(T* ptr) { @@ -76,6 +79,55 @@ T* align_ptr_up(T* ptr) return reinterpret_cast(reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); } + +// IsLittleEndian : true if and only if the binary is compiled on a little endian machine +static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; +static inline const bool IsLittleEndian = (Le.c[0] == 4); + + +template +class ValueListInserter { +public: + ValueListInserter(T* v, std::size_t& s) : + values(v), + size(&s) + { + } + + void push_back(const T& value) { values[(*size)++] = value; } +private: + T* values; + std::size_t* size; +}; + +template +class ValueList { + +public: + std::size_t size() const { return size_; } + void resize(std::size_t newSize) { size_ = newSize; } + void push_back(const T& value) { values_[size_++] = value; } + T& operator[](std::size_t index) { return values_[index]; } + T* begin() { return values_; } + T* end() { return values_ + size_; } + const T& operator[](std::size_t index) const { return values_[index]; } + const T* begin() const { return values_; } + const T* end() const { return values_ + size_; } + operator ValueListInserter() { return ValueListInserter(values_, size_); } + + void swap(ValueList& other) { + const std::size_t maxSize = std::max(size_, other.size_); + for (std::size_t i = 0; i < maxSize; ++i) { + std::swap(values_[i], other.values_[i]); + } + std::swap(size_, other.size_); + } + +private: + T values_[MaxSize]; + std::size_t size_ = 0; +}; + /// xorshift64star Pseudo-Random Number Generator /// This class is based on original code written and dedicated /// to the public domain by Sebastiano Vigna (2014). @@ -143,4 +195,6 @@ namespace CommandLine { extern std::string workingDirectory; // path of the working directory } +} // namespace Stockfish + #endif // #ifndef MISC_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp index c9d6a90..5f3ba90 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movegen.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/movegen.cpp @@ -21,24 +21,21 @@ #include "movegen.h" #include "position.h" +namespace Stockfish { + namespace { template - ExtMove* make_promotions(ExtMove* moveList, Square to, Square ksq) { + ExtMove* make_promotions(ExtMove* moveList, Square to) { if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) - { *moveList++ = make(to - D, to, QUEEN); - if (attacks_bb(to) & ksq) - *moveList++ = make(to - D, to, KNIGHT); - } if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS) { *moveList++ = make(to - D, to, ROOK); *moveList++ = make(to - D, to, BISHOP); - if (!(attacks_bb(to) & ksq)) - *moveList++ = make(to - D, to, KNIGHT); + *moveList++ = make(to - D, to, KNIGHT); } return moveList; @@ -55,20 +52,16 @@ namespace { constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST); constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST); - const Square ksq = pos.square(Them); - Bitboard emptySquares; + const Bitboard emptySquares = Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces(); + const Bitboard enemies = Type == EVASIONS ? pos.checkers() + : Type == CAPTURES ? target : pos.pieces(Them); Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; - Bitboard enemies = (Type == EVASIONS ? pos.pieces(Them) & target: - Type == CAPTURES ? target : pos.pieces(Them)); - // Single and double pawn pushes, no promotions if (Type != CAPTURES) { - emptySquares = (Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces()); - Bitboard b1 = shift(pawnsNotOn7) & emptySquares; Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; @@ -80,33 +73,24 @@ namespace { if (Type == QUIET_CHECKS) { - b1 &= pawn_attacks_bb(Them, ksq); - b2 &= pawn_attacks_bb(Them, ksq); - - // Add pawn pushes which give discovered check. This is possible only - // if the pawn is not on the same file as the enemy king, because we - // don't generate captures. Note that a possible discovered check - // promotion has been already generated amongst the captures. - Bitboard dcCandidateQuiets = pos.blockers_for_king(Them) & pawnsNotOn7; - if (dcCandidateQuiets) - { - Bitboard dc1 = shift(dcCandidateQuiets) & emptySquares & ~file_bb(ksq); - Bitboard dc2 = shift(dc1 & TRank3BB) & emptySquares; - - b1 |= dc1; - b2 |= dc2; - } + // To make a quiet check, you either make a direct check by pushing a pawn + // or push a blocker pawn that is not on the same file as the enemy king. + // Discovered check promotion has been already generated amongst the captures. + Square ksq = pos.square(Them); + Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq); + b1 &= pawn_attacks_bb(Them, ksq) | shift< Up>(dcCandidatePawns); + b2 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); } while (b1) { - Square to = pop_lsb(&b1); + Square to = pop_lsb(b1); *moveList++ = make_move(to - Up, to); } while (b2) { - Square to = pop_lsb(&b2); + Square to = pop_lsb(b2); *moveList++ = make_move(to - Up - Up, to); } } @@ -114,24 +98,21 @@ namespace { // Promotions and underpromotions if (pawnsOn7) { - if (Type == CAPTURES) - emptySquares = ~pos.pieces(); - - if (Type == EVASIONS) - emptySquares &= target; - Bitboard b1 = shift(pawnsOn7) & enemies; Bitboard b2 = shift(pawnsOn7) & enemies; Bitboard b3 = shift(pawnsOn7) & emptySquares; + if (Type == EVASIONS) + b3 &= target; + while (b1) - moveList = make_promotions(moveList, pop_lsb(&b1), ksq); + moveList = make_promotions(moveList, pop_lsb(b1)); while (b2) - moveList = make_promotions(moveList, pop_lsb(&b2), ksq); + moveList = make_promotions(moveList, pop_lsb(b2)); while (b3) - moveList = make_promotions(moveList, pop_lsb(&b3), ksq); + moveList = make_promotions(moveList, pop_lsb(b3)); } // Standard and en passant captures @@ -142,13 +123,13 @@ namespace { while (b1) { - Square to = pop_lsb(&b1); + Square to = pop_lsb(b1); *moveList++ = make_move(to - UpRight, to); } while (b2) { - Square to = pop_lsb(&b2); + Square to = pop_lsb(b2); *moveList++ = make_move(to - UpLeft, to); } @@ -156,7 +137,7 @@ namespace { { assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6)); - // An en passant capture cannot resolve a discovered check. + // An en passant capture cannot resolve a discovered check if (Type == EVASIONS && (target & (pos.ep_square() + Up))) return moveList; @@ -165,7 +146,7 @@ namespace { assert(b1); while (b1) - *moveList++ = make(pop_lsb(&b1), pos.ep_square()); + *moveList++ = make(pop_lsb(b1), pos.ep_square()); } } @@ -173,27 +154,24 @@ namespace { } - template - ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard piecesToMove, Bitboard target) { + template + ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); - Bitboard bb = piecesToMove & pos.pieces(Pt); - - if (!bb) - return moveList; - - [[maybe_unused]] const Bitboard checkSquares = pos.check_squares(Pt); - - while (bb) { - Square from = pop_lsb(&bb); + Bitboard bb = pos.pieces(Us, Pt); + while (bb) + { + Square from = pop_lsb(bb); Bitboard b = attacks_bb(from, pos.pieces()) & target; - if constexpr (Checks) - b &= checkSquares; + + // To check, you either move freely a blocker or make a direct check. + if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from))) + b &= pos.check_squares(Pt); while (b) - *moveList++ = make_move(from, pop_lsb(&b)); + *moveList++ = make_move(from, pop_lsb(b)); } return moveList; @@ -206,45 +184,34 @@ namespace { static_assert(Type != LEGAL, "Unsupported type in generate_all()"); constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations - Bitboard target, piecesToMove = pos.pieces(Us); + const Square ksq = pos.square(Us); + Bitboard target; - if(Type == QUIET_CHECKS) - piecesToMove &= ~pos.blockers_for_king(~Us); - - switch (Type) + // Skip generating non-king moves when in double check + if (Type != EVASIONS || !more_than_one(pos.checkers())) { - case CAPTURES: - target = pos.pieces(~Us); - break; - case QUIETS: - case QUIET_CHECKS: - target = ~pos.pieces(); - break; - case EVASIONS: - { - Square checksq = lsb(pos.checkers()); - target = between_bb(pos.square(Us), checksq) | checksq; - break; - } - case NON_EVASIONS: - target = ~pos.pieces(Us); - break; + target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) + : Type == NON_EVASIONS ? ~pos.pieces( Us) + : Type == CAPTURES ? pos.pieces(~Us) + : ~pos.pieces( ); // QUIETS || QUIET_CHECKS + + moveList = generate_pawn_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); } - moveList = generate_pawn_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, piecesToMove, target); - moveList = generate_moves(pos, moveList, piecesToMove, target); - moveList = generate_moves< ROOK, Checks>(pos, moveList, piecesToMove, target); - moveList = generate_moves< QUEEN, Checks>(pos, moveList, piecesToMove, target); - - if (Type != QUIET_CHECKS && Type != EVASIONS) + if (!Checks || pos.blockers_for_king(~Us) & ksq) { - Square ksq = pos.square(Us); - Bitboard b = attacks_bb(ksq) & target; - while (b) - *moveList++ = make_move(ksq, pop_lsb(&b)); + Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); + if (Checks) + b &= ~attacks_bb(pos.square(~Us)); - if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING)) + while (b) + *moveList++ = make_move(ksq, pop_lsb(b)); + + if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) if (!pos.castling_impeded(cr) && pos.can_castle(cr)) *moveList++ = make(ksq, pos.castling_rook_square(cr)); @@ -256,8 +223,10 @@ namespace { } // namespace -/// Generates all pseudo-legal captures plus queen and checking knight promotions -/// Generates all pseudo-legal non-captures and underpromotions (except checking knight) +/// Generates all pseudo-legal captures plus queen promotions +/// Generates all pseudo-legal non-captures and underpromotions +/// Generates all pseudo-legal check evasions when the side to move is in check +/// Generates all pseudo-legal non-captures giving check, except castling and promotions /// Generates all pseudo-legal captures and non-captures /// /// Returns a pointer to the end of the move list. @@ -265,8 +234,8 @@ namespace { template ExtMove* generate(const Position& pos, ExtMove* moveList) { - static_assert(Type == CAPTURES || Type == QUIETS || Type == NON_EVASIONS, "Unsupported type in generate()"); - assert(!pos.checkers()); + static_assert(Type != LEGAL, "Unsupported type in generate()"); + assert((Type == EVASIONS) == (bool)pos.checkers()); Color us = pos.side_to_move(); @@ -277,70 +246,11 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) { // Explicit template instantiations template ExtMove* generate(const Position&, ExtMove*); template ExtMove* generate(const Position&, ExtMove*); +template ExtMove* generate(const Position&, ExtMove*); +template ExtMove* generate(const Position&, ExtMove*); template ExtMove* generate(const Position&, ExtMove*); -/// generate generates all pseudo-legal non-captures giving check, -/// except castling. Returns a pointer to the end of the move list. -template<> -ExtMove* generate(const Position& pos, ExtMove* moveList) { - - assert(!pos.checkers()); - - Color us = pos.side_to_move(); - Bitboard dc = pos.blockers_for_king(~us) & pos.pieces(us) & ~pos.pieces(PAWN); - - while (dc) - { - Square from = pop_lsb(&dc); - PieceType pt = type_of(pos.piece_on(from)); - - Bitboard b = attacks_bb(pt, from, pos.pieces()) & ~pos.pieces(); - - if (pt == KING) - b &= ~attacks_bb(pos.square(~us)); - - while (b) - *moveList++ = make_move(from, pop_lsb(&b)); - } - - return us == WHITE ? generate_all(pos, moveList) - : generate_all(pos, moveList); -} - - -/// generate generates all pseudo-legal check evasions when the side -/// to move is in check. Returns a pointer to the end of the move list. -template<> -ExtMove* generate(const Position& pos, ExtMove* moveList) { - - assert(pos.checkers()); - - Color us = pos.side_to_move(); - Square ksq = pos.square(us); - Bitboard sliderAttacks = 0; - Bitboard sliders = pos.checkers() & ~pos.pieces(KNIGHT, PAWN); - - // Find all the squares attacked by slider checkers. We will remove them from - // the king evasions in order to skip known illegal moves, which avoids any - // useless legality checks later on. - while (sliders) - sliderAttacks |= line_bb(ksq, pop_lsb(&sliders)) & ~pos.checkers(); - - // Generate evasions for king, capture and non capture moves - Bitboard b = attacks_bb(ksq) & ~pos.pieces(us) & ~sliderAttacks; - while (b) - *moveList++ = make_move(ksq, pop_lsb(&b)); - - if (more_than_one(pos.checkers())) - return moveList; // Double check, only a king move can save the day - - // Generate blocking evasions or captures of the checking piece - return us == WHITE ? generate_all(pos, moveList) - : generate_all(pos, moveList); -} - - /// generate generates all the legal moves in the given position template<> @@ -362,3 +272,5 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) { return moveList; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/movegen.h b/DroidFishApp/src/main/cpp/stockfish/movegen.h index 85887a6..3f895f0 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movegen.h +++ b/DroidFishApp/src/main/cpp/stockfish/movegen.h @@ -23,6 +23,8 @@ #include "types.h" +namespace Stockfish { + class Position; enum GenType { @@ -70,4 +72,6 @@ private: ExtMove moveList[MAX_MOVES], *last; }; +} // namespace Stockfish + #endif // #ifndef MOVEGEN_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp index 0ceeb8e..4ff4cff 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movepick.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/movepick.cpp @@ -20,6 +20,8 @@ #include "movepick.h" +namespace Stockfish { + namespace { enum Stages { @@ -263,3 +265,5 @@ top: assert(false); return MOVE_NONE; // Silence warning } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/movepick.h b/DroidFishApp/src/main/cpp/stockfish/movepick.h index ea599cd..c76d495 100644 --- a/DroidFishApp/src/main/cpp/stockfish/movepick.h +++ b/DroidFishApp/src/main/cpp/stockfish/movepick.h @@ -27,6 +27,8 @@ #include "position.h" #include "types.h" +namespace Stockfish { + /// StatsEntry stores the stat table value. It is usually a number but could /// be a move or even a nested history. We use a class instead of naked value /// to directly call history update operator<<() on the entry so to use stats @@ -156,4 +158,6 @@ private: ExtMove moves[MAX_MOVES]; }; +} // namespace Stockfish + #endif // #ifndef MOVEPICK_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/architectures/halfkp_256x2-32-32.h b/DroidFishApp/src/main/cpp/stockfish/nnue/architectures/halfkp_256x2-32-32.h deleted file mode 100644 index a0fe2e0..0000000 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/architectures/halfkp_256x2-32-32.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of input features and network structure used in NNUE evaluation function - -#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED -#define NNUE_HALFKP_256X2_32_32_H_INCLUDED - -#include "../features/feature_set.h" -#include "../features/half_kp.h" - -#include "../layers/input_slice.h" -#include "../layers/affine_transform.h" -#include "../layers/clipped_relu.h" - -namespace Eval::NNUE { - -// Input features used in evaluation function -using RawFeatures = Features::FeatureSet< - Features::HalfKP>; - -// Number of input feature dimensions after conversion -constexpr IndexType kTransformedFeatureDimensions = 256; - -namespace Layers { - -// Define network structure -using InputLayer = InputSlice; -using HiddenLayer1 = ClippedReLU>; -using HiddenLayer2 = ClippedReLU>; -using OutputLayer = AffineTransform; - -} // namespace Layers - -using Network = Layers::OutputLayer; - -} // namespace Eval::NNUE - -#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp index fb4a502..8828ae5 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.cpp @@ -20,6 +20,9 @@ #include #include +#include +#include +#include #include "../evaluate.h" #include "../position.h" @@ -29,29 +32,30 @@ #include "evaluate_nnue.h" -namespace Eval::NNUE { +namespace Stockfish::Eval::NNUE { // Input feature converter - LargePagePtr feature_transformer; + LargePagePtr featureTransformer; // Evaluation function - AlignedPtr network; + AlignedPtr network[LayerStacks]; // Evaluation function file name std::string fileName; + std::string netDescription; namespace Detail { // Initialize the evaluation function parameters template - void Initialize(AlignedPtr& pointer) { + void initialize(AlignedPtr& pointer) { pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); std::memset(pointer.get(), 0, sizeof(T)); } template - void Initialize(LargePagePtr& pointer) { + void initialize(LargePagePtr& pointer) { static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); @@ -60,85 +64,380 @@ namespace Eval::NNUE { // Read evaluation function parameters template - bool ReadParameters(std::istream& stream, T& reference) { + bool read_parameters(std::istream& stream, T& reference) { std::uint32_t header; header = read_little_endian(stream); - if (!stream || header != T::GetHashValue()) return false; - return reference.ReadParameters(stream); + if (!stream || header != T::get_hash_value()) return false; + return reference.read_parameters(stream); + } + + // Write evaluation function parameters + template + bool write_parameters(std::ostream& stream, const T& reference) { + + write_little_endian(stream, T::get_hash_value()); + return reference.write_parameters(stream); } } // namespace Detail // Initialize the evaluation function parameters - void Initialize() { + void initialize() { - Detail::Initialize(feature_transformer); - Detail::Initialize(network); + Detail::initialize(featureTransformer); + for (std::size_t i = 0; i < LayerStacks; ++i) + Detail::initialize(network[i]); } // Read network header - bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture) + bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) { std::uint32_t version, size; version = read_little_endian(stream); - *hash_value = read_little_endian(stream); + *hashValue = read_little_endian(stream); size = read_little_endian(stream); - if (!stream || version != kVersion) return false; - architecture->resize(size); - stream.read(&(*architecture)[0], size); + if (!stream || version != Version) return false; + desc->resize(size); + stream.read(&(*desc)[0], size); + return !stream.fail(); + } + + // Write network header + bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) + { + write_little_endian(stream, Version); + write_little_endian(stream, hashValue); + write_little_endian(stream, desc.size()); + stream.write(&desc[0], desc.size()); return !stream.fail(); } // Read network parameters - bool ReadParameters(std::istream& stream) { + bool read_parameters(std::istream& stream) { - std::uint32_t hash_value; - std::string architecture; - if (!ReadHeader(stream, &hash_value, &architecture)) return false; - if (hash_value != kHashValue) return false; - if (!Detail::ReadParameters(stream, *feature_transformer)) return false; - if (!Detail::ReadParameters(stream, *network)) return false; + std::uint32_t hashValue; + if (!read_header(stream, &hashValue, &netDescription)) return false; + if (hashValue != HashValue) return false; + if (!Detail::read_parameters(stream, *featureTransformer)) return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::read_parameters(stream, *(network[i]))) return false; return stream && stream.peek() == std::ios::traits_type::eof(); } + // Write network parameters + bool write_parameters(std::ostream& stream) { + + if (!write_header(stream, HashValue, netDescription)) return false; + if (!Detail::write_parameters(stream, *featureTransformer)) return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::write_parameters(stream, *(network[i]))) return false; + return (bool)stream; + } + // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { + Value evaluate(const Position& pos, bool adjusted) { // We manually align the arrays on the stack because with gcc < 9.3 // overaligning stack variables with alignas() doesn't work correctly. - constexpr uint64_t alignment = kCacheLineSize; + constexpr uint64_t alignment = CacheLineSize; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) - TransformedFeatureType transformed_features_unaligned[ - FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)]; - char buffer_unaligned[Network::kBufferSize + alignment]; + TransformedFeatureType transformedFeaturesUnaligned[ + FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; + char bufferUnaligned[Network::BufferSize + alignment]; - auto* transformed_features = align_ptr_up(&transformed_features_unaligned[0]); - auto* buffer = align_ptr_up(&buffer_unaligned[0]); + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); + auto* buffer = align_ptr_up(&bufferUnaligned[0]); #else alignas(alignment) - TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize]; - alignas(alignment) char buffer[Network::kBufferSize]; + TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; + alignas(alignment) char buffer[Network::BufferSize]; #endif - ASSERT_ALIGNED(transformed_features, alignment); + ASSERT_ALIGNED(transformedFeatures, alignment); ASSERT_ALIGNED(buffer, alignment); - feature_transformer->Transform(pos, transformed_features); - const auto output = network->Propagate(transformed_features, buffer); + const std::size_t bucket = (pos.count() - 1) / 4; + const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto output = network[bucket]->propagate(transformedFeatures, buffer); - return static_cast(output[0] / FV_SCALE); + int materialist = psqt; + int positional = output[0]; + + int delta_npm = abs(pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK)); + int entertainment = (adjusted && delta_npm <= BishopValueMg - KnightValueMg ? 7 : 0); + + int A = 128 - entertainment; + int B = 128 + entertainment; + + int sum = (A * materialist + B * positional) / 128; + + return static_cast( sum / OutputScale ); } + struct NnueEvalTrace { + static_assert(LayerStacks == PSQTBuckets); + + Value psqt[LayerStacks]; + Value positional[LayerStacks]; + std::size_t correctBucket; + }; + + static NnueEvalTrace trace_evaluate(const Position& pos) { + + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. + + constexpr uint64_t alignment = CacheLineSize; + +#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) + TransformedFeatureType transformedFeaturesUnaligned[ + FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; + char bufferUnaligned[Network::BufferSize + alignment]; + + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); + auto* buffer = align_ptr_up(&bufferUnaligned[0]); +#else + alignas(alignment) + TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; + alignas(alignment) char buffer[Network::BufferSize]; +#endif + + ASSERT_ALIGNED(transformedFeatures, alignment); + ASSERT_ALIGNED(buffer, alignment); + + NnueEvalTrace t{}; + t.correctBucket = (pos.count() - 1) / 4; + for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { + const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto output = network[bucket]->propagate(transformedFeatures, buffer); + + int materialist = psqt; + int positional = output[0]; + + t.psqt[bucket] = static_cast( materialist / OutputScale ); + t.positional[bucket] = static_cast( positional / OutputScale ); + } + + return t; + } + + static const std::string PieceToChar(" PNBRQK pnbrqk"); + + // Requires the buffer to have capacity for at least 5 values + static void format_cp_compact(Value v, char* buffer) { + + buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); + + int cp = std::abs(100 * v / PawnValueEg); + + if (cp >= 10000) + { + buffer[1] = '0' + cp / 10000; cp %= 10000; + buffer[2] = '0' + cp / 1000; cp %= 1000; + buffer[3] = '0' + cp / 100; cp %= 100; + buffer[4] = ' '; + } + else if (cp >= 1000) + { + buffer[1] = '0' + cp / 1000; cp %= 1000; + buffer[2] = '0' + cp / 100; cp %= 100; + buffer[3] = '.'; + buffer[4] = '0' + cp / 10; + } + else + { + buffer[1] = '0' + cp / 100; cp %= 100; + buffer[2] = '.'; + buffer[3] = '0' + cp / 10; cp %= 10; + buffer[4] = '0' + cp / 1; + } + } + + // Requires the buffer to have capacity for at least 7 values + static void format_cp_aligned_dot(Value v, char* buffer) { + buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); + + int cp = std::abs(100 * v / PawnValueEg); + + if (cp >= 10000) + { + buffer[1] = '0' + cp / 10000; cp %= 10000; + buffer[2] = '0' + cp / 1000; cp %= 1000; + buffer[3] = '0' + cp / 100; cp %= 100; + buffer[4] = '.'; + buffer[5] = '0' + cp / 10; cp %= 10; + buffer[6] = '0' + cp; + } + else if (cp >= 1000) + { + buffer[1] = ' '; + buffer[2] = '0' + cp / 1000; cp %= 1000; + buffer[3] = '0' + cp / 100; cp %= 100; + buffer[4] = '.'; + buffer[5] = '0' + cp / 10; cp %= 10; + buffer[6] = '0' + cp; + } + else + { + buffer[1] = ' '; + buffer[2] = ' '; + buffer[3] = '0' + cp / 100; cp %= 100; + buffer[4] = '.'; + buffer[5] = '0' + cp / 10; cp %= 10; + buffer[6] = '0' + cp / 1; + } + } + + + // trace() returns a string with the value of each piece on a board, + // and a table for (PSQT, Layers) values bucket by bucket. + + std::string trace(Position& pos) { + + std::stringstream ss; + + char board[3*8+1][8*8+2]; + std::memset(board, ' ', sizeof(board)); + for (int row = 0; row < 3*8+1; ++row) + board[row][8*8+1] = '\0'; + + // A lambda to output one box of the board + auto writeSquare = [&board](File file, Rank rank, Piece pc, Value value) { + + const int x = ((int)file) * 8; + const int y = (7 - (int)rank) * 3; + for (int i = 1; i < 8; ++i) + board[y][x+i] = board[y+3][x+i] = '-'; + for (int i = 1; i < 3; ++i) + board[y+i][x] = board[y+i][x+8] = '|'; + board[y][x] = board[y][x+8] = board[y+3][x+8] = board[y+3][x] = '+'; + if (pc != NO_PIECE) + board[y+1][x+4] = PieceToChar[pc]; + if (value != VALUE_NONE) + format_cp_compact(value, &board[y+2][x+2]); + }; + + // We estimate the value of each piece by doing a differential evaluation from + // the current base eval, simulating the removal of the piece from its square. + Value base = evaluate(pos); + base = pos.side_to_move() == WHITE ? base : -base; + + for (File f = FILE_A; f <= FILE_H; ++f) + for (Rank r = RANK_1; r <= RANK_8; ++r) + { + Square sq = make_square(f, r); + Piece pc = pos.piece_on(sq); + Value v = VALUE_NONE; + + if (pc != NO_PIECE && type_of(pc) != KING) + { + auto st = pos.state(); + + pos.remove_piece(sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + + Value eval = evaluate(pos); + eval = pos.side_to_move() == WHITE ? eval : -eval; + v = base - eval; + + pos.put_piece(pc, sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + } + + writeSquare(f, r, pc, v); + } + + ss << " NNUE derived piece values:\n"; + for (int row = 0; row < 3*8+1; ++row) + ss << board[row] << '\n'; + ss << '\n'; + + auto t = trace_evaluate(pos); + + ss << " NNUE network contributions " + << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl + << "+------------+------------+------------+------------+\n" + << "| Bucket | Material | Positional | Total |\n" + << "| | (PSQT) | (Layers) | |\n" + << "+------------+------------+------------+------------+\n"; + + for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) + { + char buffer[3][8]; + std::memset(buffer, '\0', sizeof(buffer)); + + format_cp_aligned_dot(t.psqt[bucket], buffer[0]); + format_cp_aligned_dot(t.positional[bucket], buffer[1]); + format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], buffer[2]); + + ss << "| " << bucket << " " + << " | " << buffer[0] << " " + << " | " << buffer[1] << " " + << " | " << buffer[2] << " " + << " |"; + if (bucket == t.correctBucket) + ss << " <-- this bucket is used"; + ss << '\n'; + } + + ss << "+------------+------------+------------+------------+\n"; + + return ss.str(); + } + + // Load eval, from a file stream or a memory stream bool load_eval(std::string name, std::istream& stream) { - Initialize(); + initialize(); fileName = name; - return ReadParameters(stream); + return read_parameters(stream); } -} // namespace Eval::NNUE + // Save eval, to a file stream or a memory stream + bool save_eval(std::ostream& stream) { + + if (fileName.empty()) + return false; + + return write_parameters(stream); + } + + /// Save eval, to a file given by its name + bool save_eval(const std::optional& filename) { + + std::string actualFilename; + std::string msg; + + if (filename.has_value()) + actualFilename = filename.value(); + else + { + if (eval_file_loaded != EvalFileDefaultName) + { + msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified"; + + sync_cout << msg << sync_endl; + return false; + } + actualFilename = EvalFileDefaultName; + } + + std::ofstream stream(actualFilename, std::ios_base::binary); + bool saved = save_eval(stream); + + msg = saved ? "Network saved successfully to " + actualFilename + : "Failed to export a net"; + + sync_cout << msg << sync_endl; + return saved; + } + + +} // namespace Stockfish::Eval::NNUE diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h index c30d7c0..c7fa4a9 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/evaluate_nnue.h @@ -25,11 +25,11 @@ #include -namespace Eval::NNUE { +namespace Stockfish::Eval::NNUE { // Hash value of evaluation function structure - constexpr std::uint32_t kHashValue = - FeatureTransformer::GetHashValue() ^ Network::GetHashValue(); + constexpr std::uint32_t HashValue = + FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); // Deleter for automating release of memory area template @@ -54,6 +54,6 @@ namespace Eval::NNUE { template using LargePagePtr = std::unique_ptr>; -} // namespace Eval::NNUE +} // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/feature_set.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/feature_set.h deleted file mode 100644 index 77d2220..0000000 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/feature_set.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// A class template that represents the input feature set of the NNUE evaluation function - -#ifndef NNUE_FEATURE_SET_H_INCLUDED -#define NNUE_FEATURE_SET_H_INCLUDED - -#include "features_common.h" -#include - -namespace Eval::NNUE::Features { - - // Class template that represents a list of values - template - struct CompileTimeList; - - template - struct CompileTimeList { - static constexpr bool Contains(T value) { - return value == First || CompileTimeList::Contains(value); - } - static constexpr std::array - kValues = {{First, Remaining...}}; - }; - - // Base class of feature set - template - class FeatureSetBase { - - }; - - // Class template that represents the feature set - template - class FeatureSet : public FeatureSetBase> { - - public: - // Hash value embedded in the evaluation file - static constexpr std::uint32_t kHashValue = FeatureType::kHashValue; - // Number of feature dimensions - static constexpr IndexType kDimensions = FeatureType::kDimensions; - // Maximum number of simultaneously active features - static constexpr IndexType kMaxActiveDimensions = - FeatureType::kMaxActiveDimensions; - // Trigger for full calculation instead of difference calculation - using SortedTriggerSet = - CompileTimeList; - static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues; - - }; - -} // namespace Eval::NNUE::Features - -#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/features_common.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/features_common.h deleted file mode 100644 index b0073b8..0000000 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/features_common.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Common header of input features of NNUE evaluation function - -#ifndef NNUE_FEATURES_COMMON_H_INCLUDED -#define NNUE_FEATURES_COMMON_H_INCLUDED - -#include "../../evaluate.h" -#include "../nnue_common.h" - -namespace Eval::NNUE::Features { - - class IndexList; - - template - class FeatureSet; - - // Trigger to perform full calculations instead of difference only - enum class TriggerEvent { - kFriendKingMoved // calculate full evaluation when own king moves - }; - - enum class Side { - kFriend // side to move - }; - -} // namespace Eval::NNUE::Features - -#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp new file mode 100644 index 0000000..57f43e5 --- /dev/null +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.cpp @@ -0,0 +1,85 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +//Definition of input features HalfKAv2 of NNUE evaluation function + +#include "half_ka_v2.h" + +#include "../../position.h" + +namespace Stockfish::Eval::NNUE::Features { + + // Orient a square according to perspective (rotates by 180 for black) + inline Square HalfKAv2::orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 56)); + } + + // Index of a feature for a given king position and another piece on some square + inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) { + return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq); + } + + // Get a list of indices for active features + void HalfKAv2::append_active_indices( + const Position& pos, + Color perspective, + ValueListInserter active + ) { + Square ksq = orient(perspective, pos.square(perspective)); + Bitboard bb = pos.pieces(); + while (bb) + { + Square s = pop_lsb(bb); + active.push_back(make_index(perspective, s, pos.piece_on(s), ksq)); + } + } + + + // append_changed_indices() : get a list of indices for recently changed features + + void HalfKAv2::append_changed_indices( + Square ksq, + StateInfo* st, + Color perspective, + ValueListInserter removed, + ValueListInserter added + ) { + const auto& dp = st->dirtyPiece; + Square oriented_ksq = orient(perspective, ksq); + for (int i = 0; i < dp.dirty_num; ++i) { + Piece pc = dp.piece[i]; + if (dp.from[i] != SQ_NONE) + removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq)); + if (dp.to[i] != SQ_NONE) + added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq)); + } + } + + int HalfKAv2::update_cost(StateInfo* st) { + return st->dirtyPiece.dirty_num; + } + + int HalfKAv2::refresh_cost(const Position& pos) { + return pos.count(); + } + + bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) { + return st->dirtyPiece.piece[0] == make_piece(perspective, KING); + } + +} // namespace Stockfish::Eval::NNUE::Features diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h new file mode 100644 index 0000000..e4b2edd --- /dev/null +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_ka_v2.h @@ -0,0 +1,111 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +//Definition of input features HalfKP of NNUE evaluation function + +#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED +#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED + +#include "../nnue_common.h" + +#include "../../evaluate.h" +#include "../../misc.h" + +namespace Stockfish { + struct StateInfo; +} + +namespace Stockfish::Eval::NNUE::Features { + + // Feature HalfKAv2: Combination of the position of own king + // and the position of pieces + class HalfKAv2 { + + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 0, + PS_B_PAWN = 1 * SQUARE_NB, + PS_W_KNIGHT = 2 * SQUARE_NB, + PS_B_KNIGHT = 3 * SQUARE_NB, + PS_W_BISHOP = 4 * SQUARE_NB, + PS_B_BISHOP = 5 * SQUARE_NB, + PS_W_ROOK = 6 * SQUARE_NB, + PS_B_ROOK = 7 * SQUARE_NB, + PS_W_QUEEN = 8 * SQUARE_NB, + PS_B_QUEEN = 9 * SQUARE_NB, + PS_KING = 10 * SQUARE_NB, + PS_NB = 11 * SQUARE_NB + }; + + static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE }, + { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE } + }; + + // Orient a square according to perspective (rotates by 180 for black) + static Square orient(Color perspective, Square s); + + // Index of a feature for a given king position and another piece on some square + static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq); + + public: + // Feature name + static constexpr const char* Name = "HalfKAv2(Friend)"; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t HashValue = 0x5f234cb8u; + + // Number of feature dimensions + static constexpr IndexType Dimensions = + static_cast(SQUARE_NB) * static_cast(PS_NB); + + // Maximum number of simultaneously active features. + static constexpr IndexType MaxActiveDimensions = 32; + + // Get a list of indices for active features + static void append_active_indices( + const Position& pos, + Color perspective, + ValueListInserter active); + + // Get a list of indices for recently changed features + static void append_changed_indices( + Square ksq, + StateInfo* st, + Color perspective, + ValueListInserter removed, + ValueListInserter added); + + // Returns the cost of updating one perspective, the most costly one. + // Assumes no refresh needed. + static int update_cost(StateInfo* st); + static int refresh_cost(const Position& pos); + + // Returns whether the change stored in this StateInfo means that + // a full accumulator refresh is required. + static bool requires_refresh(StateInfo* st, Color perspective); + }; + +} // namespace Stockfish::Eval::NNUE::Features + +#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.cpp b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.cpp deleted file mode 100644 index b52a45f..0000000 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Definition of input features HalfKP of NNUE evaluation function - -#include "half_kp.h" -#include "index_list.h" - -namespace Eval::NNUE::Features { - - // Orient a square according to perspective (rotates by 180 for black) - inline Square orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 63)); - } - - // Index of a feature for a given king position and another piece on some square - inline IndexType make_index(Color perspective, Square s, Piece pc, Square ksq) { - return IndexType(orient(perspective, s) + kpp_board_index[perspective][pc] + PS_END * ksq); - } - - // Get a list of indices for active features - template - void HalfKP::AppendActiveIndices( - const Position& pos, Color perspective, IndexList* active) { - - Square ksq = orient(perspective, pos.square(perspective)); - Bitboard bb = pos.pieces() & ~pos.pieces(KING); - while (bb) { - Square s = pop_lsb(&bb); - active->push_back(make_index(perspective, s, pos.piece_on(s), ksq)); - } - } - - // Get a list of indices for recently changed features - template - void HalfKP::AppendChangedIndices( - const Position& pos, const DirtyPiece& dp, Color perspective, - IndexList* removed, IndexList* added) { - - Square ksq = orient(perspective, pos.square(perspective)); - for (int i = 0; i < dp.dirty_num; ++i) { - Piece pc = dp.piece[i]; - if (type_of(pc) == KING) continue; - if (dp.from[i] != SQ_NONE) - removed->push_back(make_index(perspective, dp.from[i], pc, ksq)); - if (dp.to[i] != SQ_NONE) - added->push_back(make_index(perspective, dp.to[i], pc, ksq)); - } - } - - template class HalfKP; - -} // namespace Eval::NNUE::Features diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.h deleted file mode 100644 index d203dc2..0000000 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/half_kp.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Definition of input features HalfKP of NNUE evaluation function - -#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED -#define NNUE_FEATURES_HALF_KP_H_INCLUDED - -#include "../../evaluate.h" -#include "features_common.h" - -namespace Eval::NNUE::Features { - - // Feature HalfKP: Combination of the position of own king - // and the position of pieces other than kings - template - class HalfKP { - - public: - // Feature name - static constexpr const char* kName = "HalfKP(Friend)"; - // Hash value embedded in the evaluation file - static constexpr std::uint32_t kHashValue = - 0x5D69D5B9u ^ (AssociatedKing == Side::kFriend); - // Number of feature dimensions - static constexpr IndexType kDimensions = - static_cast(SQUARE_NB) * static_cast(PS_END); - // Maximum number of simultaneously active features - static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count - // Trigger for full calculation instead of difference calculation - static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; - - // Get a list of indices for active features - static void AppendActiveIndices(const Position& pos, Color perspective, - IndexList* active); - - // Get a list of indices for recently changed features - static void AppendChangedIndices(const Position& pos, const DirtyPiece& dp, Color perspective, - IndexList* removed, IndexList* added); - }; - -} // namespace Eval::NNUE::Features - -#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/features/index_list.h b/DroidFishApp/src/main/cpp/stockfish/nnue/features/index_list.h deleted file mode 100644 index ca3ebee..0000000 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/features/index_list.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of index list of input features - -#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED -#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED - -#include "../../position.h" -#include "../nnue_architecture.h" - -namespace Eval::NNUE::Features { - - // Class template used for feature index list - template - class ValueList { - - public: - std::size_t size() const { return size_; } - void resize(std::size_t size) { size_ = size; } - void push_back(const T& value) { values_[size_++] = value; } - T& operator[](std::size_t index) { return values_[index]; } - T* begin() { return values_; } - T* end() { return values_ + size_; } - const T& operator[](std::size_t index) const { return values_[index]; } - const T* begin() const { return values_; } - const T* end() const { return values_ + size_; } - - void swap(ValueList& other) { - const std::size_t max_size = std::max(size_, other.size_); - for (std::size_t i = 0; i < max_size; ++i) { - std::swap(values_[i], other.values_[i]); - } - std::swap(size_, other.size_); - } - - private: - T values_[MaxSize]; - std::size_t size_ = 0; - }; - - //Type of feature index list - class IndexList - : public ValueList { - }; - -} // namespace Eval::NNUE::Features - -#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h index adf152e..9a3b778 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/affine_transform.h @@ -24,10 +24,10 @@ #include #include "../nnue_common.h" -namespace Eval::NNUE::Layers { +namespace Stockfish::Eval::NNUE::Layers { // Affine transformation layer - template + template class AffineTransform { public: // Input/output type @@ -36,104 +36,88 @@ namespace Eval::NNUE::Layers { static_assert(std::is_same::value, ""); // Number of input/output dimensions - static constexpr IndexType kInputDimensions = - PreviousLayer::kOutputDimensions; - static constexpr IndexType kOutputDimensions = OutputDimensions; - static constexpr IndexType kPaddedInputDimensions = - CeilToMultiple(kInputDimensions, kMaxSimdWidth); + static constexpr IndexType InputDimensions = + PreviousLayer::OutputDimensions; + static constexpr IndexType OutputDimensions = OutDims; + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); #if defined (USE_AVX512) - static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 2; + static constexpr const IndexType OutputSimdWidth = SimdWidth / 2; #elif defined (USE_SSSE3) - static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 4; + static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; #endif // Size of forward propagation buffer used in this layer - static constexpr std::size_t kSelfBufferSize = - CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize); + static constexpr std::size_t SelfBufferSize = + ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize); // Size of the forward propagation buffer used from the input layer to this layer - static constexpr std::size_t kBufferSize = - PreviousLayer::kBufferSize + kSelfBufferSize; + static constexpr std::size_t BufferSize = + PreviousLayer::BufferSize + SelfBufferSize; // Hash value embedded in the evaluation file - static constexpr std::uint32_t GetHashValue() { - std::uint32_t hash_value = 0xCC03DAE4u; - hash_value += kOutputDimensions; - hash_value ^= PreviousLayer::GetHashValue() >> 1; - hash_value ^= PreviousLayer::GetHashValue() << 31; - return hash_value; + static constexpr std::uint32_t get_hash_value() { + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= PreviousLayer::get_hash_value() >> 1; + hashValue ^= PreviousLayer::get_hash_value() << 31; + return hashValue; } - // Read network parameters - bool ReadParameters(std::istream& stream) { - if (!previous_layer_.ReadParameters(stream)) return false; - for (std::size_t i = 0; i < kOutputDimensions; ++i) - biases_[i] = read_little_endian(stream); - for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i) + // Read network parameters + bool read_parameters(std::istream& stream) { + if (!previousLayer.read_parameters(stream)) return false; + for (std::size_t i = 0; i < OutputDimensions; ++i) + biases[i] = read_little_endian(stream); + for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) #if !defined (USE_SSSE3) - weights_[i] = read_little_endian(stream); + weights[i] = read_little_endian(stream); #else - weights_[ - (i / 4) % (kPaddedInputDimensions / 4) * kOutputDimensions * 4 + - i / kPaddedInputDimensions * 4 + + weights[ + (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + + i / PaddedInputDimensions * 4 + i % 4 ] = read_little_endian(stream); - - // Determine if eights of weight and input products can be summed using 16bits - // without saturation. We assume worst case combinations of 0 and 127 for all inputs. - if (kOutputDimensions > 1 && !stream.fail()) - { - canSaturate16.count = 0; -#if !defined(USE_VNNI) - for (IndexType i = 0; i < kPaddedInputDimensions; i += 16) - for (IndexType j = 0; j < kOutputDimensions; ++j) - for (int x = 0; x < 2; ++x) - { - WeightType* w = &weights_[i * kOutputDimensions + j * 4 + x * 2]; - int sum[2] = {0, 0}; - for (int k = 0; k < 8; ++k) - { - IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2; - sum[w[idx] < 0] += w[idx]; - } - for (int sign : {-1, 1}) - while (sign * sum[sign == -1] > 258) - { - int maxK = 0, maxW = 0; - for (int k = 0; k < 8; ++k) - { - IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2; - if (maxW < sign * w[idx]) - maxK = k, maxW = sign * w[idx]; - } - - IndexType idx = maxK / 2 * kOutputDimensions * 4 + maxK % 2; - sum[sign == -1] -= w[idx]; - canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx]); - w[idx] = 0; - } - } - - // Non functional optimization for faster more linear access - std::sort(canSaturate16.ids, canSaturate16.ids + canSaturate16.count, - [](const typename CanSaturate::Entry& e1, const typename CanSaturate::Entry& e2) - { return e1.in == e2.in ? e1.out < e2.out : e1.in < e2.in; }); #endif + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + if (!previousLayer.write_parameters(stream)) return false; + for (std::size_t i = 0; i < OutputDimensions; ++i) + write_little_endian(stream, biases[i]); +#if !defined (USE_SSSE3) + for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[i]); +#else + std::unique_ptr unscrambledWeights = std::make_unique(OutputDimensions * PaddedInputDimensions); + for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) { + unscrambledWeights[i] = + weights[ + (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + + i / PaddedInputDimensions * 4 + + i % 4 + ]; } + + for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, unscrambledWeights[i]); #endif return !stream.fail(); } // Forward propagation - const OutputType* Propagate( - const TransformedFeatureType* transformed_features, char* buffer) const { - const auto input = previous_layer_.Propagate( - transformed_features, buffer + kSelfBufferSize); + const OutputType* propagate( + const TransformedFeatureType* transformedFeatures, char* buffer) const { + const auto input = previousLayer.propagate( + transformedFeatures, buffer + SelfBufferSize); #if defined (USE_AVX512) - [[maybe_unused]] const __m512i kOnes512 = _mm512_set1_epi16(1); + [[maybe_unused]] const __m512i Ones512 = _mm512_set1_epi16(1); [[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int { return _mm512_reduce_add_epi32(sum) + bias; @@ -144,7 +128,7 @@ namespace Eval::NNUE::Layers { acc = _mm512_dpbusd_epi32(acc, a, b); #else __m512i product0 = _mm512_maddubs_epi16(a, b); - product0 = _mm512_madd_epi16(product0, kOnes512); + product0 = _mm512_madd_epi16(product0, Ones512); acc = _mm512_add_epi32(acc, product0); #endif }; @@ -161,18 +145,18 @@ namespace Eval::NNUE::Layers { __m512i product1 = _mm512_maddubs_epi16(a1, b1); __m512i product2 = _mm512_maddubs_epi16(a2, b2); __m512i product3 = _mm512_maddubs_epi16(a3, b3); - product0 = _mm512_add_epi16(product0, product1); - product2 = _mm512_add_epi16(product2, product3); - product0 = _mm512_add_epi16(product0, product2); - product0 = _mm512_madd_epi16(product0, kOnes512); - acc = _mm512_add_epi32(acc, product0); + product0 = _mm512_adds_epi16(product0, product1); + product0 = _mm512_madd_epi16(product0, Ones512); + product2 = _mm512_adds_epi16(product2, product3); + product2 = _mm512_madd_epi16(product2, Ones512); + acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2)); #endif }; #endif #if defined (USE_AVX2) - [[maybe_unused]] const __m256i kOnes256 = _mm256_set1_epi16(1); + [[maybe_unused]] const __m256i Ones256 = _mm256_set1_epi16(1); [[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int { __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); @@ -186,7 +170,7 @@ namespace Eval::NNUE::Layers { acc = _mm256_dpbusd_epi32(acc, a, b); #else __m256i product0 = _mm256_maddubs_epi16(a, b); - product0 = _mm256_madd_epi16(product0, kOnes256); + product0 = _mm256_madd_epi16(product0, Ones256); acc = _mm256_add_epi32(acc, product0); #endif }; @@ -203,18 +187,18 @@ namespace Eval::NNUE::Layers { __m256i product1 = _mm256_maddubs_epi16(a1, b1); __m256i product2 = _mm256_maddubs_epi16(a2, b2); __m256i product3 = _mm256_maddubs_epi16(a3, b3); - product0 = _mm256_add_epi16(product0, product1); - product2 = _mm256_add_epi16(product2, product3); - product0 = _mm256_add_epi16(product0, product2); - product0 = _mm256_madd_epi16(product0, kOnes256); - acc = _mm256_add_epi32(acc, product0); + product0 = _mm256_adds_epi16(product0, product1); + product0 = _mm256_madd_epi16(product0, Ones256); + product2 = _mm256_adds_epi16(product2, product3); + product2 = _mm256_madd_epi16(product2, Ones256); + acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2)); #endif }; #endif #if defined (USE_SSSE3) - [[maybe_unused]] const __m128i kOnes128 = _mm_set1_epi16(1); + [[maybe_unused]] const __m128i Ones128 = _mm_set1_epi16(1); [[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int { sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC @@ -224,7 +208,7 @@ namespace Eval::NNUE::Layers { [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) { __m128i product0 = _mm_maddubs_epi16(a, b); - product0 = _mm_madd_epi16(product0, kOnes128); + product0 = _mm_madd_epi16(product0, Ones128); acc = _mm_add_epi32(acc, product0); }; @@ -235,10 +219,10 @@ namespace Eval::NNUE::Layers { __m128i product2 = _mm_maddubs_epi16(a2, b2); __m128i product3 = _mm_maddubs_epi16(a3, b3); product0 = _mm_adds_epi16(product0, product1); + product0 = _mm_madd_epi16(product0, Ones128); product2 = _mm_adds_epi16(product2, product3); - product0 = _mm_adds_epi16(product0, product2); - product0 = _mm_madd_epi16(product0, kOnes128); - acc = _mm_add_epi32(acc, product0); + product2 = _mm_madd_epi16(product2, Ones128); + acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2)); }; #endif @@ -267,73 +251,73 @@ namespace Eval::NNUE::Layers { #endif #if defined (USE_SSSE3) + // Different layout, we process 4 inputs at a time, always. + static_assert(InputDimensions % 4 == 0); const auto output = reinterpret_cast(buffer); - const auto input_vector = reinterpret_cast(input); + const auto inputVector = reinterpret_cast(input); - static_assert(kOutputDimensions % kOutputSimdWidth == 0 || kOutputDimensions == 1); + static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); - // kOutputDimensions is either 1 or a multiple of kSimdWidth + // OutputDimensions is either 1 or a multiple of SimdWidth // because then it is also an input dimension. - if constexpr (kOutputDimensions % kOutputSimdWidth == 0) + if constexpr (OutputDimensions % OutputSimdWidth == 0) { - constexpr IndexType kNumChunks = kPaddedInputDimensions / 4; + constexpr IndexType NumChunks = InputDimensions / 4; const auto input32 = reinterpret_cast(input); vec_t* outptr = reinterpret_cast(output); - std::memcpy(output, biases_, kOutputDimensions * sizeof(OutputType)); + std::memcpy(output, biases, OutputDimensions * sizeof(OutputType)); - for (int i = 0; i < (int)kNumChunks - 3; i += 4) + for (int i = 0; i < (int)NumChunks - 3; i += 4) { const vec_t in0 = vec_set_32(input32[i + 0]); const vec_t in1 = vec_set_32(input32[i + 1]); const vec_t in2 = vec_set_32(input32[i + 2]); const vec_t in3 = vec_set_32(input32[i + 3]); - const auto col0 = reinterpret_cast(&weights_[(i + 0) * kOutputDimensions * 4]); - const auto col1 = reinterpret_cast(&weights_[(i + 1) * kOutputDimensions * 4]); - const auto col2 = reinterpret_cast(&weights_[(i + 2) * kOutputDimensions * 4]); - const auto col3 = reinterpret_cast(&weights_[(i + 3) * kOutputDimensions * 4]); - for (int j = 0; j * kOutputSimdWidth < kOutputDimensions; ++j) + const auto col0 = reinterpret_cast(&weights[(i + 0) * OutputDimensions * 4]); + const auto col1 = reinterpret_cast(&weights[(i + 1) * OutputDimensions * 4]); + const auto col2 = reinterpret_cast(&weights[(i + 2) * OutputDimensions * 4]); + const auto col3 = reinterpret_cast(&weights[(i + 3) * OutputDimensions * 4]); + for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j) vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]); } - for (int i = 0; i < canSaturate16.count; ++i) - output[canSaturate16.ids[i].out] += input[canSaturate16.ids[i].in] * canSaturate16.ids[i].w; } - else if constexpr (kOutputDimensions == 1) + else if constexpr (OutputDimensions == 1) { #if defined (USE_AVX512) - if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) != 0) + if constexpr (PaddedInputDimensions % (SimdWidth * 2) != 0) { - constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; - const auto input_vector256 = reinterpret_cast(input); + constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; + const auto inputVector256 = reinterpret_cast(input); __m256i sum0 = _mm256_setzero_si256(); - const auto row0 = reinterpret_cast(&weights_[0]); + const auto row0 = reinterpret_cast(&weights[0]); - for (int j = 0; j < (int)kNumChunks; ++j) + for (int j = 0; j < (int)NumChunks; ++j) { - const __m256i in = input_vector256[j]; + const __m256i in = inputVector256[j]; m256_add_dpbusd_epi32(sum0, in, row0[j]); } - output[0] = m256_hadd(sum0, biases_[0]); + output[0] = m256_hadd(sum0, biases[0]); } else #endif { #if defined (USE_AVX512) - constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2); + constexpr IndexType NumChunks = PaddedInputDimensions / (SimdWidth * 2); #else - constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; #endif vec_t sum0 = vec_setzero(); - const auto row0 = reinterpret_cast(&weights_[0]); + const auto row0 = reinterpret_cast(&weights[0]); - for (int j = 0; j < (int)kNumChunks; ++j) + for (int j = 0; j < (int)NumChunks; ++j) { - const vec_t in = input_vector[j]; + const vec_t in = inputVector[j]; vec_add_dpbusd_32(sum0, in, row0[j]); } - output[0] = vec_hadd(sum0, biases_[0]); + output[0] = vec_hadd(sum0, biases[0]); } } @@ -344,80 +328,84 @@ namespace Eval::NNUE::Layers { auto output = reinterpret_cast(buffer); #if defined(USE_SSE2) - constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; - const __m128i kZeros = _mm_setzero_si128(); - const auto input_vector = reinterpret_cast(input); + // At least a multiple of 16, with SSE2. + static_assert(InputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m128i Zeros = _mm_setzero_si128(); + const auto inputVector = reinterpret_cast(input); #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; - const __m64 kZeros = _mm_setzero_si64(); - const auto input_vector = reinterpret_cast(input); + static_assert(InputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m64 Zeros = _mm_setzero_si64(); + const auto inputVector = reinterpret_cast(input); #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; - const auto input_vector = reinterpret_cast(input); + static_assert(InputDimensions % SimdWidth == 0); + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const auto inputVector = reinterpret_cast(input); #endif - for (IndexType i = 0; i < kOutputDimensions; ++i) { - const IndexType offset = i * kPaddedInputDimensions; + for (IndexType i = 0; i < OutputDimensions; ++i) { + const IndexType offset = i * PaddedInputDimensions; #if defined(USE_SSE2) - __m128i sum_lo = _mm_cvtsi32_si128(biases_[i]); - __m128i sum_hi = kZeros; - const auto row = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + __m128i sumLo = _mm_cvtsi32_si128(biases[i]); + __m128i sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { __m128i row_j = _mm_load_si128(&row[j]); - __m128i input_j = _mm_load_si128(&input_vector[j]); - __m128i extended_row_lo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); - __m128i extended_row_hi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); - __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros); - __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros); - __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo); - __m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi); - sum_lo = _mm_add_epi32(sum_lo, product_lo); - sum_hi = _mm_add_epi32(sum_hi, product_hi); + __m128i input_j = _mm_load_si128(&inputVector[j]); + __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); + __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); + __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); + __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); + __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); + __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_epi32(sumLo, productLo); + sumHi = _mm_add_epi32(sumHi, productHi); } - __m128i sum = _mm_add_epi32(sum_lo, sum_hi); - __m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sum_high_64); + __m128i sum = _mm_add_epi32(sumLo, sumHi); + __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sumHigh_64); __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); sum = _mm_add_epi32(sum, sum_second_32); output[i] = _mm_cvtsi128_si32(sum); #elif defined(USE_MMX) - __m64 sum_lo = _mm_cvtsi32_si64(biases_[i]); - __m64 sum_hi = kZeros; - const auto row = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { + __m64 sumLo = _mm_cvtsi32_si64(biases[i]); + __m64 sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { __m64 row_j = row[j]; - __m64 input_j = input_vector[j]; - __m64 extended_row_lo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8); - __m64 extended_row_hi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8); - __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros); - __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros); - __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo); - __m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi); - sum_lo = _mm_add_pi32(sum_lo, product_lo); - sum_hi = _mm_add_pi32(sum_hi, product_hi); + __m64 input_j = inputVector[j]; + __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8); + __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8); + __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros); + __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros); + __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo); + __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_pi32(sumLo, productLo); + sumHi = _mm_add_pi32(sumHi, productHi); } - __m64 sum = _mm_add_pi32(sum_lo, sum_hi); + __m64 sum = _mm_add_pi32(sumLo, sumHi); sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); output[i] = _mm_cvtsi64_si32(sum); #elif defined(USE_NEON) - int32x4_t sum = {biases_[i]}; - const auto row = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]); - product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]); + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); + product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); sum = vpadalq_s16(sum, product); } output[i] = sum[0] + sum[1] + sum[2] + sum[3]; #else - OutputType sum = biases_[i]; - for (IndexType j = 0; j < kInputDimensions; ++j) { - sum += weights_[offset + j] * input[j]; + OutputType sum = biases[i]; + for (IndexType j = 0; j < InputDimensions; ++j) { + sum += weights[offset + j] * input[j]; } output[i] = sum; #endif @@ -436,29 +424,12 @@ namespace Eval::NNUE::Layers { using BiasType = OutputType; using WeightType = std::int8_t; - PreviousLayer previous_layer_; + PreviousLayer previousLayer; - alignas(kCacheLineSize) BiasType biases_[kOutputDimensions]; - alignas(kCacheLineSize) WeightType weights_[kOutputDimensions * kPaddedInputDimensions]; -#if defined (USE_SSSE3) - struct CanSaturate { - int count; - struct Entry { - uint16_t out; - uint16_t in; - int8_t w; - } ids[kPaddedInputDimensions * kOutputDimensions * 3 / 4]; - - void add(int i, int j, int8_t w) { - ids[count].out = i; - ids[count].in = j; - ids[count].w = w; - ++count; - } - } canSaturate16; -#endif + alignas(CacheLineSize) BiasType biases[OutputDimensions]; + alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; }; -} // namespace Eval::NNUE::Layers +} // namespace Stockfish::Eval::NNUE::Layers #endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h index 3ed41ee..65455df 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/clipped_relu.h @@ -23,7 +23,7 @@ #include "../nnue_common.h" -namespace Eval::NNUE::Layers { +namespace Stockfish::Eval::NNUE::Layers { // Clipped ReLU template @@ -35,132 +35,157 @@ namespace Eval::NNUE::Layers { static_assert(std::is_same::value, ""); // Number of input/output dimensions - static constexpr IndexType kInputDimensions = - PreviousLayer::kOutputDimensions; - static constexpr IndexType kOutputDimensions = kInputDimensions; + static constexpr IndexType InputDimensions = + PreviousLayer::OutputDimensions; + static constexpr IndexType OutputDimensions = InputDimensions; // Size of forward propagation buffer used in this layer - static constexpr std::size_t kSelfBufferSize = - CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize); + static constexpr std::size_t SelfBufferSize = + ceil_to_multiple(OutputDimensions * sizeof(OutputType), CacheLineSize); // Size of the forward propagation buffer used from the input layer to this layer - static constexpr std::size_t kBufferSize = - PreviousLayer::kBufferSize + kSelfBufferSize; + static constexpr std::size_t BufferSize = + PreviousLayer::BufferSize + SelfBufferSize; // Hash value embedded in the evaluation file - static constexpr std::uint32_t GetHashValue() { - std::uint32_t hash_value = 0x538D24C7u; - hash_value += PreviousLayer::GetHashValue(); - return hash_value; + static constexpr std::uint32_t get_hash_value() { + std::uint32_t hashValue = 0x538D24C7u; + hashValue += PreviousLayer::get_hash_value(); + return hashValue; } // Read network parameters - bool ReadParameters(std::istream& stream) { - return previous_layer_.ReadParameters(stream); + bool read_parameters(std::istream& stream) { + return previousLayer.read_parameters(stream); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + return previousLayer.write_parameters(stream); } // Forward propagation - const OutputType* Propagate( - const TransformedFeatureType* transformed_features, char* buffer) const { - const auto input = previous_layer_.Propagate( - transformed_features, buffer + kSelfBufferSize); + const OutputType* propagate( + const TransformedFeatureType* transformedFeatures, char* buffer) const { + const auto input = previousLayer.propagate( + transformedFeatures, buffer + SelfBufferSize); const auto output = reinterpret_cast(buffer); #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; - const __m256i kZero = _mm256_setzero_si256(); - const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m256i*>(output); - for (IndexType i = 0; i < kNumChunks; ++i) { - const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits); - const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits); - _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( - _mm256_packs_epi16(words0, words1), kZero), kOffsets)); + if constexpr (InputDimensions % SimdWidth == 0) { + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m256i Zero = _mm256_setzero_si256(); + const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m256i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( + _mm256_load_si256(&in[i * 4 + 0]), + _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); + const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( + _mm256_load_si256(&in[i * 4 + 2]), + _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); + _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + _mm256_packs_epi16(words0, words1), Zero), Offsets)); + } + } else { + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const __m128i Zero = _mm_setzero_si128(); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 0]), + _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); + const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 2]), + _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + } } - constexpr IndexType kStart = kNumChunks * kSimdWidth; + constexpr IndexType Start = + InputDimensions % SimdWidth == 0 + ? InputDimensions / SimdWidth * SimdWidth + : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; + constexpr IndexType NumChunks = InputDimensions / SimdWidth; #ifdef USE_SSE41 - const __m128i kZero = _mm_setzero_si128(); + const __m128i Zero = _mm_setzero_si128(); #else const __m128i k0x80s = _mm_set1_epi8(-128); #endif const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < kNumChunks; ++i) { + for (IndexType i = 0; i < NumChunks; ++i) { const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( _mm_load_si128(&in[i * 4 + 0]), - _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits); + _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( _mm_load_si128(&in[i * 4 + 2]), - _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits); + _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); const __m128i packedbytes = _mm_packs_epi16(words0, words1); _mm_store_si128(&out[i], #ifdef USE_SSE41 - _mm_max_epi8(packedbytes, kZero) + _mm_max_epi8(packedbytes, Zero) #else _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) #endif ); } - constexpr IndexType kStart = kNumChunks * kSimdWidth; + constexpr IndexType Start = NumChunks * SimdWidth; #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; + constexpr IndexType NumChunks = InputDimensions / SimdWidth; const __m64 k0x80s = _mm_set1_pi8(-128); const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m64*>(output); - for (IndexType i = 0; i < kNumChunks; ++i) { + for (IndexType i = 0; i < NumChunks; ++i) { const __m64 words0 = _mm_srai_pi16( _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]), - kWeightScaleBits); + WeightScaleBits); const __m64 words1 = _mm_srai_pi16( _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]), - kWeightScaleBits); + WeightScaleBits); const __m64 packedbytes = _mm_packs_pi16(words0, words1); out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); } _mm_empty(); - constexpr IndexType kStart = kNumChunks * kSimdWidth; + constexpr IndexType Start = NumChunks * SimdWidth; #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2); - const int8x8_t kZero = {0}; + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const int8x8_t Zero = {0}; const auto in = reinterpret_cast(input); const auto out = reinterpret_cast(output); - for (IndexType i = 0; i < kNumChunks; ++i) { + for (IndexType i = 0; i < NumChunks; ++i) { int16x8_t shifted; const auto pack = reinterpret_cast(&shifted); - pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits); - pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits); - out[i] = vmax_s8(vqmovn_s16(shifted), kZero); + pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); + pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); + out[i] = vmax_s8(vqmovn_s16(shifted), Zero); } - constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2); + constexpr IndexType Start = NumChunks * (SimdWidth / 2); #else - constexpr IndexType kStart = 0; + constexpr IndexType Start = 0; #endif - for (IndexType i = kStart; i < kInputDimensions; ++i) { + for (IndexType i = Start; i < InputDimensions; ++i) { output[i] = static_cast( - std::max(0, std::min(127, input[i] >> kWeightScaleBits))); + std::max(0, std::min(127, input[i] >> WeightScaleBits))); } return output; } private: - PreviousLayer previous_layer_; + PreviousLayer previousLayer; }; -} // namespace Eval::NNUE::Layers +} // namespace Stockfish::Eval::NNUE::Layers #endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h index efdf072..b6bf172 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/layers/input_slice.h @@ -23,46 +23,51 @@ #include "../nnue_common.h" -namespace Eval::NNUE::Layers { +namespace Stockfish::Eval::NNUE::Layers { // Input layer -template +template class InputSlice { public: // Need to maintain alignment - static_assert(Offset % kMaxSimdWidth == 0, ""); + static_assert(Offset % MaxSimdWidth == 0, ""); // Output type using OutputType = TransformedFeatureType; // Output dimensionality - static constexpr IndexType kOutputDimensions = OutputDimensions; + static constexpr IndexType OutputDimensions = OutDims; // Size of forward propagation buffer used from the input layer to this layer - static constexpr std::size_t kBufferSize = 0; + static constexpr std::size_t BufferSize = 0; // Hash value embedded in the evaluation file - static constexpr std::uint32_t GetHashValue() { - std::uint32_t hash_value = 0xEC42E90Du; - hash_value ^= kOutputDimensions ^ (Offset << 10); - return hash_value; + static constexpr std::uint32_t get_hash_value() { + std::uint32_t hashValue = 0xEC42E90Du; + hashValue ^= OutputDimensions ^ (Offset << 10); + return hashValue; } // Read network parameters - bool ReadParameters(std::istream& /*stream*/) { + bool read_parameters(std::istream& /*stream*/) { + return true; + } + + // Write network parameters + bool write_parameters(std::ostream& /*stream*/) const { return true; } // Forward propagation - const OutputType* Propagate( - const TransformedFeatureType* transformed_features, + const OutputType* propagate( + const TransformedFeatureType* transformedFeatures, char* /*buffer*/) const { - return transformed_features + Offset; + return transformedFeatures + Offset; } private: }; -} // namespace Layers +} // namespace Stockfish::Eval::NNUE::Layers #endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h index 6b4390f..d41ecf9 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_accumulator.h @@ -23,18 +23,15 @@ #include "nnue_architecture.h" -namespace Eval::NNUE { - - // The accumulator of a StateInfo without parent is set to the INIT state - enum AccumulatorState { EMPTY, COMPUTED, INIT }; +namespace Stockfish::Eval::NNUE { // Class that holds the result of affine transformation of input features - struct alignas(kCacheLineSize) Accumulator { - std::int16_t - accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - AccumulatorState state[2]; + struct alignas(CacheLineSize) Accumulator { + std::int16_t accumulation[2][TransformedFeatureDimensions]; + std::int32_t psqtAccumulation[2][PSQTBuckets]; + bool computed[2]; }; -} // namespace Eval::NNUE +} // namespace Stockfish::Eval::NNUE #endif // NNUE_ACCUMULATOR_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h index ad5be00..879a39c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_architecture.h @@ -21,18 +21,40 @@ #ifndef NNUE_ARCHITECTURE_H_INCLUDED #define NNUE_ARCHITECTURE_H_INCLUDED -// Defines the network structure -#include "architectures/halfkp_256x2-32-32.h" +#include "nnue_common.h" -namespace Eval::NNUE { +#include "features/half_ka_v2.h" - static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, ""); - static_assert(Network::kOutputDimensions == 1, ""); +#include "layers/input_slice.h" +#include "layers/affine_transform.h" +#include "layers/clipped_relu.h" + +namespace Stockfish::Eval::NNUE { + + // Input features used in evaluation function + using FeatureSet = Features::HalfKAv2; + + // Number of input feature dimensions after conversion + constexpr IndexType TransformedFeatureDimensions = 512; + constexpr IndexType PSQTBuckets = 8; + constexpr IndexType LayerStacks = 8; + + namespace Layers { + + // Define network structure + using InputLayer = InputSlice; + using HiddenLayer1 = ClippedReLU>; + using HiddenLayer2 = ClippedReLU>; + using OutputLayer = AffineTransform; + + } // namespace Layers + + using Network = Layers::OutputLayer; + + static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, ""); + static_assert(Network::OutputDimensions == 1, ""); static_assert(std::is_same::value, ""); - // Trigger for full calculation instead of difference calculation - constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers; - -} // namespace Eval::NNUE +} // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h index 33e5874..75ac786 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_common.h @@ -24,6 +24,8 @@ #include #include +#include "../misc.h" // for IsLittleEndian + #if defined(USE_AVX2) #include @@ -43,61 +45,33 @@ #include #endif -namespace Eval::NNUE { +namespace Stockfish::Eval::NNUE { // Version of the evaluation file - constexpr std::uint32_t kVersion = 0x7AF32F16u; + constexpr std::uint32_t Version = 0x7AF32F20u; // Constant used in evaluation value calculation - constexpr int FV_SCALE = 16; - constexpr int kWeightScaleBits = 6; + constexpr int OutputScale = 16; + constexpr int WeightScaleBits = 6; // Size of cache line (in bytes) - constexpr std::size_t kCacheLineSize = 64; + constexpr std::size_t CacheLineSize = 64; // SIMD width (in bytes) #if defined(USE_AVX2) - constexpr std::size_t kSimdWidth = 32; + constexpr std::size_t SimdWidth = 32; #elif defined(USE_SSE2) - constexpr std::size_t kSimdWidth = 16; + constexpr std::size_t SimdWidth = 16; #elif defined(USE_MMX) - constexpr std::size_t kSimdWidth = 8; + constexpr std::size_t SimdWidth = 8; #elif defined(USE_NEON) - constexpr std::size_t kSimdWidth = 16; + constexpr std::size_t SimdWidth = 16; #endif - constexpr std::size_t kMaxSimdWidth = 32; - - // unique number for each piece type on each square - enum { - PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_W_KING = 10 * SQUARE_NB + 1, - PS_END = PS_W_KING, // pieces without kings (pawns included) - PS_B_KING = 11 * SQUARE_NB + 1, - PS_END2 = 12 * SQUARE_NB + 1 - }; - - constexpr uint32_t kpp_board_index[COLOR_NB][PIECE_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE, - PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE }, - { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE, - PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE } - }; + constexpr std::size_t MaxSimdWidth = 32; // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; @@ -105,7 +79,7 @@ namespace Eval::NNUE { // Round n up to be a multiple of base template - constexpr IntType CeilToMultiple(IntType n, IntType base) { + constexpr IntType ceil_to_multiple(IntType n, IntType base) { return (n + base - 1) / base * base; } @@ -114,19 +88,77 @@ namespace Eval::NNUE { // necessary to return a result with the byte ordering of the compiling machine. template inline IntType read_little_endian(std::istream& stream) { - IntType result; - std::uint8_t u[sizeof(IntType)]; - typename std::make_unsigned::type v = 0; - stream.read(reinterpret_cast(u), sizeof(IntType)); - for (std::size_t i = 0; i < sizeof(IntType); ++i) - v = (v << 8) | u[sizeof(IntType) - i - 1]; + if (IsLittleEndian) + stream.read(reinterpret_cast(&result), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = 0; + + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + + std::memcpy(&result, &v, sizeof(IntType)); + } - std::memcpy(&result, &v, sizeof(IntType)); return result; } -} // namespace Eval::NNUE + // write_little_endian() is our utility to write an integer (signed or unsigned, any size) + // to a stream in little-endian order. We swap the byte order before the write if + // necessary to always write in little endian order, independantly of the byte + // ordering of the compiling machine. + template + inline void write_little_endian(std::ostream& stream, IntType value) { + + if (IsLittleEndian) + stream.write(reinterpret_cast(&value), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = value; + + std::size_t i = 0; + // if constexpr to silence the warning about shift by 8 + if constexpr (sizeof(IntType) > 1) + { + for (; i + 1 < sizeof(IntType); ++i) + { + u[i] = v; + v >>= 8; + } + } + u[i] = v; + + stream.write(reinterpret_cast(u), sizeof(IntType)); + } + } + + // read_little_endian(s, out, N) : read integers in bulk from a little indian stream. + // This reads N integers from stream s and put them in array out. + template + inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { + if (IsLittleEndian) + stream.read(reinterpret_cast(out), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + out[i] = read_little_endian(stream); + } + + // write_little_endian(s, values, N) : write integers in bulk to a little indian stream. + // This takes N integers from array values and writes them on stream s. + template + inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { + if (IsLittleEndian) + stream.write(reinterpret_cast(values), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + write_little_endian(stream, values[i]); + } + +} // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h index 2641321..59a965a 100644 --- a/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h +++ b/DroidFishApp/src/main/cpp/stockfish/nnue/nnue_feature_transformer.h @@ -23,72 +23,158 @@ #include "nnue_common.h" #include "nnue_architecture.h" -#include "features/index_list.h" #include // std::memset() -namespace Eval::NNUE { +namespace Stockfish::Eval::NNUE { + + using BiasType = std::int16_t; + using WeightType = std::int16_t; + using PSQTWeightType = std::int32_t; // If vector instructions are enabled, we update and refresh the // accumulator tile by tile such that each tile fits in the CPU's // vector registers. #define VECTOR + static_assert(PSQTBuckets % 8 == 0, + "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); + #ifdef USE_AVX512 typedef __m512i vec_t; + typedef __m256i psqt_vec_t; #define vec_load(a) _mm512_load_si512(a) #define vec_store(a,b) _mm512_store_si512(a,b) #define vec_add_16(a,b) _mm512_add_epi16(a,b) #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) - static constexpr IndexType kNumRegs = 8; // only 8 are needed + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a,b) _mm256_store_si256(a,b) + #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 32 #elif USE_AVX2 typedef __m256i vec_t; + typedef __m256i psqt_vec_t; #define vec_load(a) _mm256_load_si256(a) #define vec_store(a,b) _mm256_store_si256(a,b) #define vec_add_16(a,b) _mm256_add_epi16(a,b) #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) - static constexpr IndexType kNumRegs = 16; + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a,b) _mm256_store_si256(a,b) + #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 16 #elif USE_SSE2 typedef __m128i vec_t; + typedef __m128i psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_epi16(a,b) #define vec_sub_16(a,b) _mm_sub_epi16(a,b) - static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b) + #define vec_zero_psqt() _mm_setzero_si128() + #define NumRegistersSIMD (Is64Bit ? 16 : 8) #elif USE_MMX typedef __m64 vec_t; + typedef __m64 psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_pi16(a,b) #define vec_sub_16(a,b) _mm_sub_pi16(a,b) - static constexpr IndexType kNumRegs = 8; + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b) + #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b) + #define vec_zero_psqt() _mm_setzero_si64() + #define NumRegistersSIMD 8 #elif USE_NEON typedef int16x8_t vec_t; + typedef int32x4_t psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) vaddq_s16(a,b) #define vec_sub_16(a,b) vsubq_s16(a,b) - static constexpr IndexType kNumRegs = 16; + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) vaddq_s32(a,b) + #define vec_sub_psqt_32(a,b) vsubq_s32(a,b) + #define vec_zero_psqt() psqt_vec_t{0} + #define NumRegistersSIMD 16 #else #undef VECTOR #endif + + #ifdef VECTOR + + // Compute optimal SIMD register count for feature transformer accumulation. + + // We use __m* types as template arguments, which causes GCC to emit warnings + // about losing some attribute information. This is irrelevant to us as we + // only take their size, so the following pragma are harmless. + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wignored-attributes" + + template + static constexpr int BestRegisterCount() + { + #define RegisterSize sizeof(SIMDRegisterType) + #define LaneSize sizeof(LaneType) + + static_assert(RegisterSize >= LaneSize); + static_assert(MaxRegisters <= NumRegistersSIMD); + static_assert(MaxRegisters > 0); + static_assert(NumRegistersSIMD > 0); + static_assert(RegisterSize % LaneSize == 0); + static_assert((NumLanes * LaneSize) % RegisterSize == 0); + + const int ideal = (NumLanes * LaneSize) / RegisterSize; + if (ideal <= MaxRegisters) + return ideal; + + // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters + for (int divisor = MaxRegisters; divisor > 1; --divisor) + if (ideal % divisor == 0) + return divisor; + + return 1; + } + + static constexpr int NumRegs = BestRegisterCount(); + static constexpr int NumPsqtRegs = BestRegisterCount(); + + #pragma GCC diagnostic pop + + #endif + + + // Input feature converter class FeatureTransformer { private: // Number of output dimensions for one side - static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; #ifdef VECTOR - static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; - static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); + static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; + static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; + static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); + static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets"); #endif public: @@ -96,174 +182,219 @@ namespace Eval::NNUE { using OutputType = TransformedFeatureType; // Number of input/output dimensions - static constexpr IndexType kInputDimensions = RawFeatures::kDimensions; - static constexpr IndexType kOutputDimensions = kHalfDimensions * 2; + static constexpr IndexType InputDimensions = FeatureSet::Dimensions; + static constexpr IndexType OutputDimensions = HalfDimensions * 2; // Size of forward propagation buffer - static constexpr std::size_t kBufferSize = - kOutputDimensions * sizeof(OutputType); + static constexpr std::size_t BufferSize = + OutputDimensions * sizeof(OutputType); // Hash value embedded in the evaluation file - static constexpr std::uint32_t GetHashValue() { - - return RawFeatures::kHashValue ^ kOutputDimensions; + static constexpr std::uint32_t get_hash_value() { + return FeatureSet::HashValue ^ OutputDimensions; } // Read network parameters - bool ReadParameters(std::istream& stream) { + bool read_parameters(std::istream& stream) { + + read_little_endian(stream, biases , HalfDimensions ); + read_little_endian(stream, weights , HalfDimensions * InputDimensions); + read_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + + write_little_endian(stream, biases , HalfDimensions ); + write_little_endian(stream, weights , HalfDimensions * InputDimensions); + write_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); - for (std::size_t i = 0; i < kHalfDimensions; ++i) - biases_[i] = read_little_endian(stream); - for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) - weights_[i] = read_little_endian(stream); return !stream.fail(); } // Convert input features - void Transform(const Position& pos, OutputType* output) const { - - UpdateAccumulator(pos, WHITE); - UpdateAccumulator(pos, BLACK); - - const auto& accumulation = pos.state()->accumulator.accumulation; - - #if defined(USE_AVX512) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2); - static_assert(kHalfDimensions % (kSimdWidth * 2) == 0); - const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7); - const __m512i kZero = _mm512_setzero_si512(); - - #elif defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - constexpr int kControl = 0b11011000; - const __m256i kZero = _mm256_setzero_si256(); - - #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - - #ifdef USE_SSE41 - const __m128i kZero = _mm_setzero_si128(); - #else - const __m128i k0x80s = _mm_set1_epi8(-128); - #endif - - #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - const __m64 k0x80s = _mm_set1_pi8(-128); - - #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - const int8x8_t kZero = {0}; - #endif + std::int32_t transform(const Position& pos, OutputType* output, int bucket) const { + update_accumulator(pos, WHITE); + update_accumulator(pos, BLACK); const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; - for (IndexType p = 0; p < 2; ++p) { - const IndexType offset = kHalfDimensions * p; + const auto& accumulation = pos.state()->accumulator.accumulation; + const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation; + + const auto psqt = ( + psqtAccumulation[perspectives[0]][bucket] + - psqtAccumulation[perspectives[1]][bucket] + ) / 2; + #if defined(USE_AVX512) - auto out = reinterpret_cast<__m512i*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m512i sum0 = _mm512_load_si512( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); - __m512i sum1 = _mm512_load_si512( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); - _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl, - _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero))); - } + + constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2); + static_assert(HalfDimensions % (SimdWidth * 2) == 0); + const __m512i Control = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7); + const __m512i Zero = _mm512_setzero_si512(); + + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = HalfDimensions * p; + auto out = reinterpret_cast<__m512i*>(&output[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + __m512i sum0 = _mm512_load_si512(&reinterpret_cast + (accumulation[perspectives[p]])[j * 2 + 0]); + __m512i sum1 = _mm512_load_si512(&reinterpret_cast + (accumulation[perspectives[p]])[j * 2 + 1]); + + _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control, + _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero))); + } + } + return psqt; #elif defined(USE_AVX2) - auto out = reinterpret_cast<__m256i*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m256i sum0 = _mm256_load_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); - __m256i sum1 = _mm256_load_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); - _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( - _mm256_packs_epi16(sum0, sum1), kZero), kControl)); - } + + constexpr IndexType NumChunks = HalfDimensions / SimdWidth; + constexpr int Control = 0b11011000; + const __m256i Zero = _mm256_setzero_si256(); + + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = HalfDimensions * p; + auto out = reinterpret_cast<__m256i*>(&output[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + __m256i sum0 = _mm256_load_si256(&reinterpret_cast + (accumulation[perspectives[p]])[j * 2 + 0]); + __m256i sum1 = _mm256_load_si256(&reinterpret_cast + (accumulation[perspectives[p]])[j * 2 + 1]); + + _mm256_store_si256(&out[j], _mm256_permute4x64_epi64( + _mm256_max_epi8(_mm256_packs_epi16(sum0, sum1), Zero), Control)); + } + } + return psqt; #elif defined(USE_SSE2) - auto out = reinterpret_cast<__m128i*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m128i sum0 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m128i sum1 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); - _mm_store_si128(&out[j], + #ifdef USE_SSE41 + constexpr IndexType NumChunks = HalfDimensions / SimdWidth; + const __m128i Zero = _mm_setzero_si128(); + #else + constexpr IndexType NumChunks = HalfDimensions / SimdWidth; + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif - #ifdef USE_SSE41 - _mm_max_epi8(packedbytes, kZero) - #else - _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) - #endif + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = HalfDimensions * p; + auto out = reinterpret_cast<__m128i*>(&output[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + __m128i sum0 = _mm_load_si128(&reinterpret_cast + (accumulation[perspectives[p]])[j * 2 + 0]); + __m128i sum1 = _mm_load_si128(&reinterpret_cast + (accumulation[perspectives[p]])[j * 2 + 1]); + const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); - ); - } + #ifdef USE_SSE41 + _mm_store_si128(&out[j], _mm_max_epi8(packedbytes, Zero)); + #else + _mm_store_si128(&out[j], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)); + #endif + } + } + return psqt; #elif defined(USE_MMX) - auto out = reinterpret_cast<__m64*>(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m64 sum0 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m64 sum1 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); - out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); - } + + constexpr IndexType NumChunks = HalfDimensions / SimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); + + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = HalfDimensions * p; + auto out = reinterpret_cast<__m64*>(&output[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + __m64 sum0 = *(&reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 0]); + __m64 sum1 = *(&reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 1]); + const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); + out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + } + _mm_empty(); + return psqt; #elif defined(USE_NEON) - const auto out = reinterpret_cast(&output[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - int16x8_t sum = reinterpret_cast( - accumulation[perspectives[p]][0])[j]; - out[j] = vmax_s8(vqmovn_s16(sum), kZero); - } + + constexpr IndexType NumChunks = HalfDimensions / (SimdWidth / 2); + const int8x8_t Zero = {0}; + + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = HalfDimensions * p; + const auto out = reinterpret_cast(&output[offset]); + for (IndexType j = 0; j < NumChunks; ++j) + { + int16x8_t sum = reinterpret_cast(accumulation[perspectives[p]])[j]; + out[j] = vmax_s8(vqmovn_s16(sum), Zero); + } + } + return psqt; #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; - output[offset + j] = static_cast( - std::max(0, std::min(127, sum))); - } + + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = HalfDimensions * p; + for (IndexType j = 0; j < HalfDimensions; ++j) + { + BiasType sum = accumulation[perspectives[p]][j]; + output[offset + j] = static_cast(std::max(0, std::min(127, sum))); + } + } + return psqt; + #endif - } - #if defined(USE_MMX) - _mm_empty(); - #endif - } + } // end of function transform() + + private: - void UpdateAccumulator(const Position& pos, const Color c) const { + void update_accumulator(const Position& pos, const Color perspective) const { + + // The size must be enough to contain the largest possible update. + // That might depend on the feature set and generally relies on the + // feature set's update cost calculation to be correct and never + // allow updates with more added/removed features than MaxActiveDimensions. + using IndexList = ValueList; #ifdef VECTOR // Gcc-10.2 unnecessarily spills AVX2 registers if this array // is defined in the VECTOR code below, once in each branch - vec_t acc[kNumRegs]; + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; #endif // Look for a usable accumulator of an earlier position. We keep track // of the estimated gain in terms of features to be added/subtracted. StateInfo *st = pos.state(), *next = nullptr; - int gain = pos.count() - 2; - while (st->accumulator.state[c] == EMPTY) + int gain = FeatureSet::refresh_cost(pos); + while (st->previous && !st->accumulator.computed[perspective]) { - auto& dp = st->dirtyPiece; - // The first condition tests whether an incremental update is - // possible at all: if this side's king has moved, it is not possible. - static_assert(std::is_same_v>, - "Current code assumes that only kFriendlyKingMoved refresh trigger is being used."); - if ( dp.piece[0] == make_piece(c, KING) - || (gain -= dp.dirty_num + 1) < 0) + // This governs when a full feature refresh is needed and how many + // updates are better than just one full refresh. + if ( FeatureSet::requires_refresh(st, perspective) + || (gain -= FeatureSet::update_cost(st) + 1) < 0) break; next = st; st = st->previous; } - if (st->accumulator.state[c] == COMPUTED) + if (st->accumulator.computed[perspective]) { if (next == nullptr) return; @@ -271,85 +402,129 @@ namespace Eval::NNUE { // Update incrementally in two steps. First, we update the "next" // accumulator. Then, we update the current accumulator (pos.state()). - // Gather all features to be updated. This code assumes HalfKP features - // only and doesn't support refresh triggers. - static_assert(std::is_same_v>, - RawFeatures>); - Features::IndexList removed[2], added[2]; - Features::HalfKP::AppendChangedIndices(pos, - next->dirtyPiece, c, &removed[0], &added[0]); + // Gather all features to be updated. + const Square ksq = pos.square(perspective); + IndexList removed[2], added[2]; + FeatureSet::append_changed_indices( + ksq, next, perspective, removed[0], added[0]); for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) - Features::HalfKP::AppendChangedIndices(pos, - st2->dirtyPiece, c, &removed[1], &added[1]); + FeatureSet::append_changed_indices( + ksq, st2, perspective, removed[1], added[1]); // Mark the accumulators as computed. - next->accumulator.state[c] = COMPUTED; - pos.state()->accumulator.state[c] = COMPUTED; + next->accumulator.computed[perspective] = true; + pos.state()->accumulator.computed[perspective] = true; - // Now update the accumulators listed in info[], where the last element is a sentinel. - StateInfo *info[3] = + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + StateInfo *states_to_update[3] = { next, next == pos.state() ? nullptr : pos.state(), nullptr }; #ifdef VECTOR - for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { // Load accumulator auto accTile = reinterpret_cast( - &st->accumulator.accumulation[c][0][j * kTileHeight]); - for (IndexType k = 0; k < kNumRegs; ++k) + &st->accumulator.accumulation[perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_load(&accTile[k]); - for (IndexType i = 0; info[i]; ++i) + for (IndexType i = 0; states_to_update[i]; ++i) { // Difference calculation for the deactivated features for (const auto index : removed[i]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType k = 0; k < kNumRegs; ++k) + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_sub_16(acc[k], column[k]); } // Difference calculation for the activated features for (const auto index : added[i]) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType k = 0; k < kNumRegs; ++k) + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_add_16(acc[k], column[k]); } // Store accumulator accTile = reinterpret_cast( - &info[i]->accumulator.accumulation[c][0][j * kTileHeight]); - for (IndexType k = 0; k < kNumRegs; ++k) + &states_to_update[i]->accumulator.accumulation[perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) vec_store(&accTile[k], acc[k]); } } - #else - for (IndexType i = 0; info[i]; ++i) + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { - std::memcpy(info[i]->accumulator.accumulation[c][0], - st->accumulator.accumulation[c][0], - kHalfDimensions * sizeof(BiasType)); - st = info[i]; + // Load accumulator + auto accTilePsqt = reinterpret_cast( + &st->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_load_psqt(&accTilePsqt[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) + { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + // Store accumulator + accTilePsqt = reinterpret_cast( + &states_to_update[i]->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + } + + #else + for (IndexType i = 0; states_to_update[i]; ++i) + { + std::memcpy(states_to_update[i]->accumulator.accumulation[perspective], + st->accumulator.accumulation[perspective], + HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + states_to_update[i]->accumulator.psqtAccumulation[perspective][k] = st->accumulator.psqtAccumulation[perspective][k]; + + st = states_to_update[i]; // Difference calculation for the deactivated features for (const auto index : removed[i]) { - const IndexType offset = kHalfDimensions * index; + const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < kHalfDimensions; ++j) - st->accumulator.accumulation[c][0][j] -= weights_[offset + j]; + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[perspective][j] -= weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[perspective][k] -= psqtWeights[index * PSQTBuckets + k]; } // Difference calculation for the activated features for (const auto index : added[i]) { - const IndexType offset = kHalfDimensions * index; + const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < kHalfDimensions; ++j) - st->accumulator.accumulation[c][0][j] += weights_[offset + j]; + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k]; } } #endif @@ -358,43 +533,69 @@ namespace Eval::NNUE { { // Refresh the accumulator auto& accumulator = pos.state()->accumulator; - accumulator.state[c] = COMPUTED; - Features::IndexList active; - Features::HalfKP::AppendActiveIndices(pos, c, &active); + accumulator.computed[perspective] = true; + IndexList active; + FeatureSet::append_active_indices(pos, perspective, active); #ifdef VECTOR - for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { auto biasesTile = reinterpret_cast( - &biases_[j * kTileHeight]); - for (IndexType k = 0; k < kNumRegs; ++k) + &biases[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = biasesTile[k]; for (const auto index : active) { - const IndexType offset = kHalfDimensions * index + j * kTileHeight; - auto column = reinterpret_cast(&weights_[offset]); + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); - for (unsigned k = 0; k < kNumRegs; ++k) + for (unsigned k = 0; k < NumRegs; ++k) acc[k] = vec_add_16(acc[k], column[k]); } auto accTile = reinterpret_cast( - &accumulator.accumulation[c][0][j * kTileHeight]); - for (unsigned k = 0; k < kNumRegs; k++) + &accumulator.accumulation[perspective][j * TileHeight]); + for (unsigned k = 0; k < NumRegs; k++) vec_store(&accTile[k], acc[k]); } + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_zero_psqt(); + + for (const auto index : active) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + #else - std::memcpy(accumulator.accumulation[c][0], biases_, - kHalfDimensions * sizeof(BiasType)); + std::memcpy(accumulator.accumulation[perspective], biases, + HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[perspective][k] = 0; for (const auto index : active) { - const IndexType offset = kHalfDimensions * index; + const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[c][0][j] += weights_[offset + j]; + for (IndexType j = 0; j < HalfDimensions; ++j) + accumulator.accumulation[perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k]; } #endif } @@ -404,14 +605,11 @@ namespace Eval::NNUE { #endif } - using BiasType = std::int16_t; - using WeightType = std::int16_t; - - alignas(kCacheLineSize) BiasType biases_[kHalfDimensions]; - alignas(kCacheLineSize) - WeightType weights_[kHalfDimensions * kInputDimensions]; + alignas(CacheLineSize) BiasType biases[HalfDimensions]; + alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; + alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets]; }; -} // namespace Eval::NNUE +} // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp index cd4d4e4..70fb6f2 100644 --- a/DroidFishApp/src/main/cpp/stockfish/pawns.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/pawns.cpp @@ -24,6 +24,8 @@ #include "position.h" #include "thread.h" +namespace Stockfish { + namespace { #define V Value @@ -107,8 +109,9 @@ namespace { e->blockedCount += popcount(shift(ourPawns) & (theirPawns | doubleAttackThem)); // Loop through all pawns of the current color and score each pawn - while (b) { - s = pop_lsb(&b); + while (b) + { + s = pop_lsb(b); assert(pos.piece_on(s) == make_piece(Us, PAWN)); @@ -288,7 +291,7 @@ Score Entry::do_king_safety(const Position& pos) { if (pawns & attacks_bb(ksq)) minPawnDist = 1; else while (pawns) - minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(&pawns))); + minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(pawns))); return shelter - make_score(0, 16 * minPawnDist); } @@ -298,3 +301,5 @@ template Score Entry::do_king_safety(const Position& pos); template Score Entry::do_king_safety(const Position& pos); } // namespace Pawns + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/pawns.h b/DroidFishApp/src/main/cpp/stockfish/pawns.h index 888bf99..124619d 100644 --- a/DroidFishApp/src/main/cpp/stockfish/pawns.h +++ b/DroidFishApp/src/main/cpp/stockfish/pawns.h @@ -23,7 +23,7 @@ #include "position.h" #include "types.h" -namespace Pawns { +namespace Stockfish::Pawns { /// Pawns::Entry contains various information about a pawn structure. A lookup /// to the pawn hash table (performed by calling the probe function) returns a @@ -65,6 +65,6 @@ typedef HashTable Table; Entry* probe(const Position& pos); -} // namespace Pawns +} // namespace Stockfish::Pawns #endif // #ifndef PAWNS_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/position.cpp b/DroidFishApp/src/main/cpp/stockfish/position.cpp index 2eb30ca..ba015d3 100644 --- a/DroidFishApp/src/main/cpp/stockfish/position.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/position.cpp @@ -34,6 +34,8 @@ using std::string; +namespace Stockfish { + namespace Zobrist { Key psq[PIECE_NB][SQUARE_NB]; @@ -71,13 +73,13 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) { << std::setfill(' ') << std::dec << "\nCheckers: "; for (Bitboard b = pos.checkers(); b; ) - os << UCI::square(pop_lsb(&b)) << " "; + os << UCI::square(pop_lsb(b)) << " "; if ( int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) { StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize); + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); Position p; p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread()); @@ -249,8 +251,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th set_castling_right(c, rsq); } - set_state(st); - // 4. En passant square. // Ignore if square is invalid or not on side to move relative rank 6. bool enpassant = false; @@ -264,24 +264,12 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th // a) side to move have a pawn threatening epSquare // b) there is an enemy pawn in front of epSquare // c) there is no piece on epSquare or behind epSquare - // d) enemy pawn didn't block a check of its own color by moving forward enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN) && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))) - && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))) - && ( file_of(square(sideToMove)) == file_of(st->epSquare) - || !(blockers_for_king(sideToMove) & (st->epSquare + pawn_push(~sideToMove)))); + && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); } - // It's necessary for st->previous to be intialized in this way because legality check relies on its existence - if (enpassant) { - st->previous = new StateInfo(); - remove_piece(st->epSquare - pawn_push(sideToMove)); - st->previous->checkersBB = attackers_to(square(~sideToMove)) & pieces(sideToMove); - st->previous->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), st->previous->pinners[BLACK]); - st->previous->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), st->previous->pinners[WHITE]); - put_piece(make_piece(~sideToMove, PAWN), st->epSquare - pawn_push(sideToMove)); - } - else + if (!enpassant) st->epSquare = SQ_NONE; // 5-6. Halfmove clock and fullmove number @@ -293,8 +281,7 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th chess960 = isChess960; thisThread = th; - st->accumulator.state[WHITE] = Eval::NNUE::INIT; - st->accumulator.state[BLACK] = Eval::NNUE::INIT; + set_state(st); assert(pos_is_ok()); @@ -318,7 +305,7 @@ void Position::set_castling_right(Color c, Square rfrom) { Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); - castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto) | rto | kto) + castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) & ~(kfrom | rfrom); } @@ -357,7 +344,7 @@ void Position::set_state(StateInfo* si) const { for (Bitboard b = pieces(); b; ) { - Square s = pop_lsb(&b); + Square s = pop_lsb(b); Piece pc = piece_on(s); si->key ^= Zobrist::psq[pc][s]; @@ -408,7 +395,7 @@ Position& Position::set(const string& code, Color c, StateInfo* si) { /// Position::fen() returns a FEN representation of the position. In case of /// Chess960 the Shredder-FEN notation is used. This is mainly a debugging function. -const string Position::fen() const { +string Position::fen() const { int emptyCnt; std::ostringstream ss; @@ -474,7 +461,7 @@ Bitboard Position::slider_blockers(Bitboard sliders, Square s, Bitboard& pinners while (snipers) { - Square sniperSq = pop_lsb(&snipers); + Square sniperSq = pop_lsb(snipers); Bitboard b = between_bb(s, sniperSq) & occupancy; if (b && !more_than_one(b)) @@ -515,11 +502,23 @@ bool Position::legal(Move m) const { assert(color_of(moved_piece(m)) == us); assert(piece_on(square(us)) == make_piece(us, KING)); - // st->previous->blockersForKing consider capsq as empty. - // If pinned, it has to move along the king ray. + // En passant captures are a tricky special case. Because they are rather + // uncommon, we do it simply by testing whether the king is attacked after + // the move is made. if (type_of(m) == EN_PASSANT) - return !(st->previous->blockersForKing[sideToMove] & from) - || aligned(from, to, square(us)); + { + Square ksq = square(us); + Square capsq = to - pawn_push(us); + Bitboard occupied = (pieces() ^ from ^ capsq) | to; + + assert(to == ep_square()); + assert(moved_piece(m) == make_piece(us, PAWN)); + assert(piece_on(capsq) == make_piece(~us, PAWN)); + assert(piece_on(to) == NO_PIECE); + + return !(attacks_bb< ROOK>(ksq, occupied) & pieces(~us, QUEEN, ROOK)) + && !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, BISHOP)); + } // Castling moves generation does not check if the castling path is clear of // enemy attacks, it is delayed at a later time: now! @@ -542,7 +541,7 @@ bool Position::legal(Move m) const { // If the moving piece is a king, check whether the destination square is // attacked by the opponent. if (type_of(piece_on(from)) == KING) - return !(attackers_to(to) & pieces(~us)); + return !(attackers_to(to, pieces() ^ from) & pieces(~us)); // A non-king move is legal if and only if it is not pinned or it // is moving along the ray towards or away from the king. @@ -611,8 +610,8 @@ bool Position::pseudo_legal(const Move m) const { if (more_than_one(checkers())) return false; - // Our move must be a blocking evasion or a capture of the checking piece - if (!((between_bb(lsb(checkers()), square(us)) | checkers()) & to)) + // Our move must be a blocking interposition or a capture of the checking piece + if (!(between_bb(square(us), lsb(checkers())) & to)) return false; } // In case of king moves under check we have to remove king so as to catch @@ -652,15 +651,18 @@ bool Position::gives_check(Move m) const { case PROMOTION: return attacks_bb(promotion_type(m), to, pieces() ^ from) & square(~sideToMove); - // The double-pushed pawn blocked a check? En Passant will remove the blocker. - // The only discovery check that wasn't handle is through capsq and fromsq - // So the King must be in the same rank as fromsq to consider this possibility. - // st->previous->blockersForKing consider capsq as empty. + // En passant capture with check? We have already handled the case + // of direct checks and ordinary discovered check, so the only case we + // need to handle is the unusual case of a discovered check through + // the captured pawn. case EN_PASSANT: - return st->previous->checkersBB - || ( rank_of(square(~sideToMove)) == rank_of(from) - && st->previous->blockersForKing[~sideToMove] & from); + { + Square capsq = make_square(file_of(to), rank_of(from)); + Bitboard b = (pieces() ^ from ^ capsq) | to; + return (attacks_bb< ROOK>(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) + | (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, BISHOP)); + } default: //CASTLING { // Castling is encoded as 'king captures the rook' @@ -700,8 +702,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { ++st->pliesFromNull; // Used by NNUE - st->accumulator.state[WHITE] = Eval::NNUE::EMPTY; - st->accumulator.state[BLACK] = Eval::NNUE::EMPTY; + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -986,7 +988,7 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ } -/// Position::do(undo)_null_move() is used to do(undo) a "null move": it flips +/// Position::do_null_move() is used to do a "null move": it flips /// the side to move without executing any move on the board. void Position::do_null_move(StateInfo& newSt) { @@ -1001,8 +1003,8 @@ void Position::do_null_move(StateInfo& newSt) { st->dirtyPiece.dirty_num = 0; st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() - st->accumulator.state[WHITE] = Eval::NNUE::EMPTY; - st->accumulator.state[BLACK] = Eval::NNUE::EMPTY; + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; if (st->epSquare != SQ_NONE) { @@ -1025,6 +1027,9 @@ void Position::do_null_move(StateInfo& newSt) { assert(pos_is_ok()); } + +/// Position::undo_null_move() must be used to undo a "null move" + void Position::undo_null_move() { assert(!checkers()); @@ -1090,8 +1095,8 @@ bool Position::see_ge(Move m, Value threshold) const { if (!(stmAttackers = attackers & pieces(stm))) break; - // Don't allow pinned pieces to attack (except the king) as long as - // there are pinners on their original square. + // Don't allow pinned pieces to attack as long as there are + // pinners on their original square. if (pinners(~stm) & occupied) stmAttackers &= ~blockers_for_king(stm); @@ -1107,7 +1112,7 @@ bool Position::see_ge(Move m, Value threshold) const { if ((swap = PawnValueMg - swap) < res) break; - occupied ^= lsb(bb); + occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } @@ -1116,7 +1121,7 @@ bool Position::see_ge(Move m, Value threshold) const { if ((swap = KnightValueMg - swap) < res) break; - occupied ^= lsb(bb); + occupied ^= least_significant_square_bb(bb); } else if ((bb = stmAttackers & pieces(BISHOP))) @@ -1124,7 +1129,7 @@ bool Position::see_ge(Move m, Value threshold) const { if ((swap = BishopValueMg - swap) < res) break; - occupied ^= lsb(bb); + occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } @@ -1133,7 +1138,7 @@ bool Position::see_ge(Move m, Value threshold) const { if ((swap = RookValueMg - swap) < res) break; - occupied ^= lsb(bb); + occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); } @@ -1142,7 +1147,7 @@ bool Position::see_ge(Move m, Value threshold) const { if ((swap = QueenValueMg - swap) < res) break; - occupied ^= lsb(bb); + occupied ^= least_significant_square_bb(bb); attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) | (attacks_bb(to, occupied) & pieces(ROOK , QUEEN)); } @@ -1216,7 +1221,7 @@ bool Position::has_game_cycle(int ply) const { Square s1 = from_sq(move); Square s2 = to_sq(move); - if (!(between_bb(s1, s2) & pieces())) + if (!((between_bb(s1, s2) ^ s2) & pieces())) { if (ply > i) return true; @@ -1313,7 +1318,7 @@ bool Position::pos_is_ok() const { assert(0 && "pos_is_ok: Bitboards"); StateInfo si = *st; - ASSERT_ALIGNED(&si, Eval::NNUE::kCacheLineSize); + ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize); set_state(&si); if (std::memcmp(&si, st, sizeof(StateInfo))) @@ -1338,3 +1343,5 @@ bool Position::pos_is_ok() const { return true; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/position.h b/DroidFishApp/src/main/cpp/stockfish/position.h index 3624e29..9f694a7 100644 --- a/DroidFishApp/src/main/cpp/stockfish/position.h +++ b/DroidFishApp/src/main/cpp/stockfish/position.h @@ -31,6 +31,7 @@ #include "nnue/nnue_accumulator.h" +namespace Stockfish { /// StateInfo struct stores information needed to restore a Position object to /// its previous state when we retract a move. Whenever a move is made on the @@ -50,11 +51,11 @@ struct StateInfo { // Not copied when making a move (will be recomputed anyhow) Key key; Bitboard checkersBB; - Piece capturedPiece; StateInfo* previous; Bitboard blockersForKing[COLOR_NB]; Bitboard pinners[COLOR_NB]; Bitboard checkSquares[PIECE_TYPE_NB]; + Piece capturedPiece; int repetition; // Used by NNUE @@ -87,7 +88,7 @@ public: // FEN string input/output Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th); Position& set(const std::string& code, Color c, StateInfo* si); - const std::string fen() const; + std::string fen() const; // Position representation Bitboard pieces(PieceType pt) const; @@ -114,7 +115,6 @@ public: Bitboard blockers_for_king(Color c) const; Bitboard check_squares(PieceType pt) const; Bitboard pinners(Color c) const; - bool is_discovered_check_on_king(Color c, Move m) const; // Attacks to/from a given square Bitboard attackers_to(Square s) const; @@ -127,7 +127,6 @@ public: bool capture(Move m) const; bool capture_or_promotion(Move m) const; bool gives_check(Move m) const; - bool advanced_pawn_push(Move m) const; Piece moved_piece(Move m) const; Piece captured_piece() const; @@ -172,6 +171,9 @@ public: // Used by NNUE StateInfo* state() const; + void put_piece(Piece pc, Square s); + void remove_piece(Square s); + private: // Initialization helpers (used while setting up a position) void set_castling_right(Color c, Square rfrom); @@ -179,8 +181,6 @@ private: void set_check_info(StateInfo* si) const; // Other helpers - void put_piece(Piece pc, Square s); - void remove_piece(Square s); void move_piece(Square from, Square to); template void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); @@ -193,11 +193,11 @@ private: int castlingRightsMask[SQUARE_NB]; Square castlingRookSquare[CASTLING_RIGHT_NB]; Bitboard castlingPath[CASTLING_RIGHT_NB]; + Thread* thisThread; + StateInfo* st; int gamePly; Color sideToMove; Score psq; - Thread* thisThread; - StateInfo* st; bool chess960; }; @@ -301,19 +301,10 @@ inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } -inline bool Position::is_discovered_check_on_king(Color c, Move m) const { - return st->blockersForKing[c] & from_sq(m); -} - inline bool Position::pawn_passed(Color c, Square s) const { return !(pieces(~c, PAWN) & passed_pawn_span(c, s)); } -inline bool Position::advanced_pawn_push(Move m) const { - return type_of(moved_piece(m)) == PAWN - && relative_rank(sideToMove, to_sq(m)) > RANK_5; -} - inline int Position::pawns_on_same_color_squares(Color c, Square s) const { return popcount(pieces(c, PAWN) & ((DarkSquares & s) ? DarkSquares : ~DarkSquares)); } @@ -396,7 +387,7 @@ inline void Position::remove_piece(Square s) { byTypeBB[ALL_PIECES] ^= s; byTypeBB[type_of(pc)] ^= s; byColorBB[color_of(pc)] ^= s; - /* board[s] = NO_PIECE; Not needed, overwritten by the capturing one */ + board[s] = NO_PIECE; pieceCount[pc]--; pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; psq -= PSQT::psq[pc][s]; @@ -423,4 +414,6 @@ inline StateInfo* Position::state() const { return st; } +} // namespace Stockfish + #endif // #ifndef POSITION_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp index cfade29..33a3e00 100644 --- a/DroidFishApp/src/main/cpp/stockfish/psqt.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/psqt.cpp @@ -24,6 +24,7 @@ #include "bitboard.h" #include "types.h" +namespace Stockfish { namespace { @@ -126,3 +127,5 @@ void init() { } } // namespace PSQT + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/psqt.h b/DroidFishApp/src/main/cpp/stockfish/psqt.h index 8b4fd6e..7abb148 100644 --- a/DroidFishApp/src/main/cpp/stockfish/psqt.h +++ b/DroidFishApp/src/main/cpp/stockfish/psqt.h @@ -24,7 +24,7 @@ #include "types.h" -namespace PSQT +namespace Stockfish::PSQT { extern Score psq[PIECE_NB][SQUARE_NB]; @@ -32,7 +32,7 @@ extern Score psq[PIECE_NB][SQUARE_NB]; // Fill psqt array from a set of internally linked parameters extern void init(); -} // namespace PSQT +} // namespace Stockfish::PSQT #endif // PSQT_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/search.cpp b/DroidFishApp/src/main/cpp/stockfish/search.cpp index b5d21b9..a413bd3 100644 --- a/DroidFishApp/src/main/cpp/stockfish/search.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/search.cpp @@ -35,6 +35,8 @@ #include "uci.h" #include "syzygy/tbprobe.h" +namespace Stockfish { + namespace Search { LimitsType Limits; @@ -57,14 +59,14 @@ using namespace Search; namespace { // Different node types, used as a template parameter - enum NodeType { NonPV, PV }; + enum NodeType { NonPV, PV, Root }; constexpr uint64_t TtHitAverageWindow = 4096; constexpr uint64_t TtHitAverageResolution = 1024; // Futility margin Value futility_margin(Depth d, bool improving) { - return Value(234 * (d - improving)); + return Value(214 * (d - improving)); } // Reductions lookup table, initialized at startup @@ -72,7 +74,7 @@ namespace { Depth reduction(bool i, Depth d, int mn) { int r = Reductions[d] * Reductions[mn]; - return (r + 503) / 1024 + (!i && r > 915); + return (r + 534) / 1024 + (!i && r > 904); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -81,7 +83,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return d > 14 ? 66 : 6 * d * d + 231 * d - 206; + return d > 14 ? 73 : 6 * d * d + 229 * d - 215; } // Add a small random component to draw evaluations to avoid 3-fold blindness @@ -100,53 +102,10 @@ namespace { Move best = MOVE_NONE; }; - // Breadcrumbs are used to mark nodes as being searched by a given thread - struct Breadcrumb { - std::atomic thread; - std::atomic key; - }; - std::array breadcrumbs; - - // ThreadHolding structure keeps track of which thread left breadcrumbs at the given - // node for potential reductions. A free node will be marked upon entering the moves - // loop by the constructor, and unmarked upon leaving that loop by the destructor. - struct ThreadHolding { - explicit ThreadHolding(Thread* thisThread, Key posKey, int ply) { - location = ply < 8 ? &breadcrumbs[posKey & (breadcrumbs.size() - 1)] : nullptr; - otherThread = false; - owning = false; - if (location) - { - // See if another already marked this location, if not, mark it ourselves - Thread* tmp = (*location).thread.load(std::memory_order_relaxed); - if (tmp == nullptr) - { - (*location).thread.store(thisThread, std::memory_order_relaxed); - (*location).key.store(posKey, std::memory_order_relaxed); - owning = true; - } - else if ( tmp != thisThread - && (*location).key.load(std::memory_order_relaxed) == posKey) - otherThread = true; - } - } - - ~ThreadHolding() { - if (owning) // Free the marked location - (*location).thread.store(nullptr, std::memory_order_relaxed); - } - - bool marked() { return otherThread; } - - private: - Breadcrumb* location; - bool otherThread, owning; - }; - - template + template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); - template + template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); Value value_to_tt(Value v, int ply); @@ -163,7 +122,7 @@ namespace { uint64_t perft(Position& pos, Depth depth) { StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize); + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); uint64_t cnt, nodes = 0; const bool leaf = (depth == 2); @@ -193,7 +152,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((21.3 + 2 * std::log(Threads.size())) * std::log(i + 0.25 * std::log(i))); + Reductions[i] = int(21.9 * std::log(i)); } @@ -294,7 +253,7 @@ void Thread::search() { // To allow access to (ss-7) up to (ss+2), the stack must be oversized. // The former is needed to allow update_continuation_histories(ss-1, ...), // which accesses its argument at ss-6, also near the root. - // The latter is needed for statScores and killer initialization. + // The latter is needed for statScore and killer initialization. Stack stack[MAX_PLY+10], *ss = stack+7; Move pv[MAX_PLY+1]; Value bestValue, alpha, beta, delta; @@ -309,6 +268,9 @@ void Thread::search() { for (int i = 7; i > 0; i--) (ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel + for (int i = 0; i <= MAX_PLY + 2; ++i) + (ss+i)->ply = i; + ss->pv = pv; bestValue = delta = alpha = -VALUE_INFINITE; @@ -350,19 +312,7 @@ void Thread::search() { multiPV = std::min(multiPV, rootMoves.size()); ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2; - int ct = int(Options["Contempt"]) * PawnValueEg / 100; // From centipawns - - // In analysis mode, adjust contempt in accordance with user preference - if (Limits.infinite || Options["UCI_AnalyseMode"]) - ct = Options["Analysis Contempt"] == "Off" ? 0 - : Options["Analysis Contempt"] == "Both" ? ct - : Options["Analysis Contempt"] == "White" && us == BLACK ? -ct - : Options["Analysis Contempt"] == "Black" && us == WHITE ? -ct - : ct; - - // Evaluation score is from the white point of view - contempt = (us == WHITE ? make_score(ct, ct / 2) - : -make_score(ct, ct / 2)); + trend = SCORE_ZERO; int searchAgainCounter = 0; @@ -408,21 +358,21 @@ void Thread::search() { alpha = std::max(prev - delta,-VALUE_INFINITE); beta = std::min(prev + delta, VALUE_INFINITE); - // Adjust contempt based on root move's previousScore (dynamic contempt) - int dct = ct + (113 - ct / 2) * prev / (abs(prev) + 147); + // Adjust trend based on root move's previousScore (dynamic contempt) + int tr = 113 * prev / (abs(prev) + 147); - contempt = (us == WHITE ? make_score(dct, dct / 2) - : -make_score(dct, dct / 2)); + trend = (us == WHITE ? make_score(tr, tr / 2) + : -make_score(tr, tr / 2)); } // Start with a small aspiration window and, in the case of a fail // high/low, re-search with a bigger window until we don't fail // high/low anymore. - failedHighCnt = 0; + int failedHighCnt = 0; while (true) { Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - searchAgainCounter); - bestValue = ::search(rootPos, ss, alpha, beta, adjustedDepth, false); + bestValue = Stockfish::search(rootPos, ss, alpha, beta, adjustedDepth, false); // Bring the best move to the front. It is critical that sorting // is done with a stable algorithm because all the values but the @@ -518,8 +468,8 @@ void Thread::search() { totBestMoveChanges += th->bestMoveChanges; th->bestMoveChanges = 0; } - double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size(); - + double bestMoveInstability = 1.073 + std::max(1.0, 2.25 - 9.9 / rootDepth) + * totBestMoveChanges / Threads.size(); double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability; // Cap used time in case of a single legal move for a better viewer experience in tournaments @@ -565,18 +515,18 @@ namespace { // search<>() is the main search function for both PV and non-PV nodes - template + template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { - constexpr bool PvNode = NT == PV; - const bool rootNode = PvNode && ss->ply == 0; + constexpr bool PvNode = nodeType != NonPV; + constexpr bool rootNode = nodeType == Root; const Depth maxNextDepth = rootNode ? depth : depth + 1; // Check if we have an upcoming move which draws by repetition, or // if the opponent had an alternative move earlier to this position. - if ( pos.rule50_count() >= 3 + if ( !rootNode + && pos.rule50_count() >= 3 && alpha < VALUE_DRAW - && !rootNode && pos.has_game_cycle(ss->ply)) { alpha = value_draw(pos.this_thread()); @@ -586,7 +536,7 @@ namespace { // Dive into quiescence search when the depth reaches zero if (depth <= 0) - return qsearch(pos, ss, alpha, beta); + return qsearch(pos, ss, alpha, beta); assert(-VALUE_INFINITE <= alpha && alpha < beta && beta <= VALUE_INFINITE); assert(PvNode || (alpha == beta - 1)); @@ -595,14 +545,14 @@ namespace { Move pv[MAX_PLY+1], capturesSearched[32], quietsSearched[64]; StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize); + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); TTEntry* tte; Key posKey; Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool formerPv, givesCheck, improving, didLMR, priorCapture; + bool givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -610,12 +560,12 @@ namespace { // Step 1. Initialize node Thread* thisThread = pos.this_thread(); - ss->inCheck = pos.checkers(); - priorCapture = pos.captured_piece(); - Color us = pos.side_to_move(); - moveCount = captureCount = quietCount = ss->moveCount = 0; - bestValue = -VALUE_INFINITE; - maxValue = VALUE_INFINITE; + ss->inCheck = pos.checkers(); + priorCapture = pos.captured_piece(); + Color us = pos.side_to_move(); + moveCount = captureCount = quietCount = ss->moveCount = 0; + bestValue = -VALUE_INFINITE; + maxValue = VALUE_INFINITE; // Check for the available remaining time if (thisThread == Threads.main()) @@ -648,11 +598,11 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); - (ss+1)->ply = ss->ply + 1; - (ss+1)->ttPv = false; + (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; - (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; - Square prevSq = to_sq((ss-1)->currentMove); + (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; + ss->doubleExtensions = (ss-1)->doubleExtensions; + Square prevSq = to_sq((ss-1)->currentMove); // Initialize statScore to zero for the grandchildren of the current position. // So statScore is shared between all grandchildren and only the first grandchild @@ -673,7 +623,6 @@ namespace { : ss->ttHit ? tte->move() : MOVE_NONE; if (!excludedMove) ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); - formerPv = ss->ttPv && !PvNode; // Update low ply history for previous move if we are near root and position is or has been in PV if ( ss->ttPv @@ -808,7 +757,7 @@ namespace { if ((ss-1)->currentMove != MOVE_NULL) ss->staticEval = eval = evaluate(pos); else - ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; + ss->staticEval = eval = -(ss-1)->staticEval; // Save static evaluation into transposition table tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); @@ -817,7 +766,7 @@ namespace { // Use static evaluation difference to improve quiet move ordering if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) { - int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval - 2 * Tempo), -1000, 1000); + int bonus = std::clamp(-depth * 4 * int((ss-1)->staticEval + ss->staticEval), -1000, 1000); thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; } @@ -839,10 +788,10 @@ namespace { // Step 8. Null move search with verification search (~40 Elo) if ( !PvNode && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 22661 + && (ss-1)->statScore < 23767 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 24 * depth - 34 * improving + 162 * ss->ttPv + 159 + && ss->staticEval >= beta - 20 * depth - 22 * improving + 168 * ss->ttPv + 159 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -850,7 +799,7 @@ namespace { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and value - Depth R = (1062 + 68 * depth) / 256 + std::min(int(eval - beta) / 190, 3); + Depth R = (1090 + 81 * depth) / 256 + std::min(int(eval - beta) / 205, 3); ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -888,7 +837,7 @@ namespace { probCutBeta = beta + 209 - 44 * improving; - // Step 9. ProbCut (~10 Elo) + // Step 9. ProbCut (~4 Elo) // If we have a good enough capture and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. if ( !PvNode @@ -903,17 +852,8 @@ namespace { && ttValue != VALUE_NONE && ttValue < probCutBeta)) { - // if ttMove is a capture and value from transposition table is good enough produce probCut - // cutoff without digging into actual probCut search - if ( ss->ttHit - && tte->depth() >= depth - 3 - && ttValue != VALUE_NONE - && ttValue >= probCutBeta - && ttMove - && pos.capture_or_promotion(ttMove)) - return probCutBeta; - assert(probCutBeta < VALUE_INFINITE); + MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; bool ttPv = ss->ttPv; @@ -969,6 +909,23 @@ namespace { moves_loop: // When in check, search starts from here + ttCapture = ttMove && pos.capture_or_promotion(ttMove); + + // Step 11. A small Probcut idea, when we are in check + probCutBeta = beta + 409; + if ( ss->inCheck + && !PvNode + && depth >= 4 + && ttCapture + && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 3 + && ttValue >= probCutBeta + && abs(ttValue) <= VALUE_KNOWN_WIN + && abs(beta) <= VALUE_KNOWN_WIN + ) + return probCutBeta; + + const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, nullptr , (ss-4)->continuationHistory, nullptr , (ss-6)->continuationHistory }; @@ -985,12 +942,16 @@ moves_loop: // When in check, search starts from here value = bestValue; singularQuietLMR = moveCountPruning = false; - ttCapture = ttMove && pos.capture_or_promotion(ttMove); + bool doubleExtension = false; - // Mark this node as being searched - ThreadHolding th(thisThread, posKey, ss->ply); + // Indicate PvNodes that will probably fail low if the node was searched + // at a depth equal or greater than the current depth, and the result of this search was a fail low. + bool likelyFailLow = PvNode + && ttMove + && (tte->bound() & BOUND_UPPER) + && tte->depth() >= depth; - // Step 11. Loop through all pseudo-legal moves until no moves remain + // Step 12. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -1025,18 +986,10 @@ moves_loop: // When in check, search starts from here movedPiece = pos.moved_piece(move); givesCheck = pos.gives_check(move); - // Indicate PvNodes that will probably fail low if node was searched with non-PV search - // at depth equal or greater to current depth and result of this search was far below alpha - bool likelyFailLow = PvNode - && ttMove - && (tte->bound() & BOUND_UPPER) - && ttValue < alpha + 200 + 100 * depth - && tte->depth() >= depth; - // Calculate new depth for this move newDepth = depth - 1; - // Step 12. Pruning at shallow depth (~200 Elo) + // Step 13. Pruning at shallow depth (~200 Elo) if ( !rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) @@ -1062,8 +1015,8 @@ moves_loop: // When in check, search starts from here } else { - // Countermoves based pruning (~20 Elo) - if ( lmrDepth < 4 + ((ss-1)->statScore > 0 || (ss-1)->moveCount == 1) + // Continuation history based pruning (~20 Elo) + if ( lmrDepth < 5 && (*contHist[0])[movedPiece][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[movedPiece][to_sq(move)] < CounterMovePruneThreshold) continue; @@ -1075,7 +1028,7 @@ moves_loop: // When in check, search starts from here && (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - + (*contHist[5])[movedPiece][to_sq(move)] / 3 < 26237) + + (*contHist[5])[movedPiece][to_sq(move)] / 3 < 28255) continue; // Prune moves with negative SEE (~20 Elo) @@ -1084,24 +1037,25 @@ moves_loop: // When in check, search starts from here } } - // Step 13. Extensions (~75 Elo) + // Step 14. Extensions (~75 Elo) // Singular extension search (~70 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), // then that move is singular and should be extended. To verify this we do // a reduced search on all the other moves but the ttMove and if the // result is lower than ttValue minus a margin, then we will extend the ttMove. - if ( depth >= 7 + if ( !rootNode + && depth >= 7 && move == ttMove - && !rootNode && !excludedMove // Avoid recursive singular search /* && ttValue != VALUE_NONE Already implicit in the next condition */ && abs(ttValue) < VALUE_KNOWN_WIN && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - ((formerPv + 4) * depth) / 2; - Depth singularDepth = (depth - 1 + 3 * formerPv) / 2; + Value singularBeta = ttValue - 2 * depth; + Depth singularDepth = (depth - 1) / 2; + ss->excludedMove = move; value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); ss->excludedMove = MOVE_NONE; @@ -1110,6 +1064,15 @@ moves_loop: // When in check, search starts from here { extension = 1; singularQuietLMR = !ttCapture; + + // Avoid search explosion by limiting the number of double extensions to at most 3 + if ( !PvNode + && value < singularBeta - 93 + && ss->doubleExtensions < 3) + { + extension = 2; + doubleExtension = true; + } } // Multi-cut pruning @@ -1132,19 +1095,14 @@ moves_loop: // When in check, search starts from here return beta; } } - - // Check extension (~2 Elo) - else if ( givesCheck - && (pos.is_discovered_check_on_king(~us, move) || pos.see_ge(move))) - extension = 1; - - // Last captures extension - else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg - && pos.non_pawn_material() <= 2 * RookValueMg) + else if ( givesCheck + && depth > 6 + && abs(ss->staticEval) > Value(100)) extension = 1; // Add extension to new depth newDepth += extension; + ss->doubleExtensions = (ss-1)->doubleExtensions + (extension == 2); // Speculative prefetch as early as possible prefetch(TT.first_entry(pos.key_after(move))); @@ -1156,117 +1114,87 @@ moves_loop: // When in check, search starts from here [movedPiece] [to_sq(move)]; - // Step 14. Make the move + // Step 15. Make the move pos.do_move(move, st, givesCheck); - // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be - // re-searched at full depth. + // Step 16. Late moves reduction / extension (LMR, ~200 Elo) + // We use various heuristics for the sons of a node after the first son has + // been searched. In general we would like to reduce them, but there are many + // cases where we extend a son if it has good chances to be "interesting". if ( depth >= 3 && moveCount > 1 + 2 * rootNode && ( !captureOrPromotion - || moveCountPruning - || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha - || cutNode - || (!PvNode && !formerPv && captureHistory[movedPiece][to_sq(move)][type_of(pos.captured_piece())] < 4506) - || thisThread->ttHitAverage < 432 * TtHitAverageResolution * TtHitAverageWindow / 1024)) + || (cutNode && (ss-1)->moveCount > 1) + || !ss->ttPv) + && (!PvNode || ss->ply > 1 || thisThread->id() % 4 != 3)) { Depth r = reduction(improving, depth, moveCount); - // Decrease reduction if the ttHit running average is large + if (PvNode) + r--; + + // Decrease reduction if the ttHit running average is large (~0 Elo) if (thisThread->ttHitAverage > 537 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; - // Increase reduction if other threads are searching this position - if (th.marked()) - r++; - // Decrease reduction if position is or has been on the PV - // and node is not likely to fail low. (~10 Elo) - if (ss->ttPv && !likelyFailLow) + // and node is not likely to fail low. (~3 Elo) + if ( ss->ttPv + && !likelyFailLow) r -= 2; // Increase reduction at root and non-PV nodes when the best move does not change frequently - if ((rootNode || !PvNode) && thisThread->rootDepth > 10 && thisThread->bestMoveChanges <= 2) + if ( (rootNode || !PvNode) + && thisThread->bestMoveChanges <= 2) r++; - // More reductions for late moves if position was not in previous PV - if (moveCountPruning && !formerPv) - r++; - - // Decrease reduction if opponent's move count is high (~5 Elo) + // Decrease reduction if opponent's move count is high (~1 Elo) if ((ss-1)->moveCount > 13) r--; - // Decrease reduction if ttMove has been singularly extended (~3 Elo) + // Decrease reduction if ttMove has been singularly extended (~1 Elo) if (singularQuietLMR) r--; - if (captureOrPromotion) + // Increase reduction for cut nodes (~3 Elo) + if (cutNode) + r += 1 + !captureOrPromotion; + + if (!captureOrPromotion) { - // Unless giving check, this capture is likely bad - if ( !givesCheck - && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 210 * depth <= alpha) - r++; - } - else - { - // Increase reduction if ttMove is a capture (~5 Elo) + // Increase reduction if ttMove is a capture (~3 Elo) if (ttCapture) r++; - // Increase reduction at root if failing high - r += rootNode ? thisThread->failedHighCnt * thisThread->failedHighCnt * moveCount / 512 : 0; - - // Increase reduction for cut nodes (~10 Elo) - if (cutNode) - r += 2; - - // Decrease reduction for moves that escape a capture. Filter out - // castling moves, because they are coded as "king captures rook" and - // hence break make_move(). (~2 Elo) - else if ( type_of(move) == NORMAL - && !pos.see_ge(reverse_move(move))) - r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN); - ss->statScore = thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - - 5337; - - // Decrease/increase reduction by comparing opponent's stat score (~10 Elo) - if (ss->statScore >= -89 && (ss-1)->statScore < -116) - r--; - - else if ((ss-1)->statScore >= -112 && ss->statScore < -100) - r++; + - 4923; // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - // If we are not in check use statScore, if we are in check - // use sum of main history and first continuation history with an offset - if (ss->inCheck) - r -= (thisThread->mainHistory[us][from_to(move)] - + (*contHist[0])[movedPiece][to_sq(move)] - 4341) / 16384; - else - r -= ss->statScore / 14382; + if (!ss->inCheck) + r -= ss->statScore / 14721; } - Depth d = std::clamp(newDepth - r, 1, newDepth); + // In general we want to cap the LMR depth search at newDepth. But if + // reductions are really negative and movecount is low, we allow this move + // to be searched deeper than the first move, unless ttMove was extended by 2. + Depth d = std::clamp(newDepth - r, 1, newDepth + (r < -1 && moveCount <= 5 && !doubleExtension)); value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); - doFullDepthSearch = value > alpha && d != newDepth; - + // If the son is reduced and fails high it will be re-searched at full depth + doFullDepthSearch = value > alpha && d < newDepth; didLMR = true; } else { doFullDepthSearch = !PvNode || moveCount > 1; - didLMR = false; } - // Step 16. Full depth search when LMR is skipped or fails high + // Step 17. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); @@ -1293,12 +1221,12 @@ moves_loop: // When in check, search starts from here std::min(maxNextDepth, newDepth), false); } - // Step 17. Undo move + // Step 18. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 18. Check for a new best move + // Step 19. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1350,7 +1278,6 @@ moves_loop: // When in check, search starts from here else { assert(value >= beta); // Fail high - ss->statScore = 0; break; } } @@ -1375,7 +1302,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 19. Check for mate and stalemate + // Step 20. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. @@ -1383,8 +1310,9 @@ moves_loop: // When in check, search starts from here assert(moveCount || !ss->inCheck || excludedMove || !MoveList(pos).size()); if (!moveCount) - bestValue = excludedMove ? alpha - : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW; + bestValue = excludedMove ? alpha : + ss->inCheck ? mated_in(ss->ply) + : VALUE_DRAW; // If there is a move which produces search value greater than alpha we update stats of searched moves else if (bestMove) @@ -1423,10 +1351,11 @@ moves_loop: // When in check, search starts from here // qsearch() is the quiescence search function, which is called by the main search // function with zero depth, or recursively with further decreasing depth per call. - template + template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { - constexpr bool PvNode = NT == PV; + static_assert(nodeType != Root); + constexpr bool PvNode = nodeType == PV; assert(alpha >= -VALUE_INFINITE && alpha < beta && beta <= VALUE_INFINITE); assert(PvNode || (alpha == beta - 1)); @@ -1434,7 +1363,7 @@ moves_loop: // When in check, search starts from here Move pv[MAX_PLY+1]; StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize); + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); TTEntry* tte; Key posKey; @@ -1452,7 +1381,6 @@ moves_loop: // When in check, search starts from here } Thread* thisThread = pos.this_thread(); - (ss+1)->ply = ss->ply + 1; bestMove = MOVE_NONE; ss->inCheck = pos.checkers(); moveCount = 0; @@ -1508,7 +1436,7 @@ moves_loop: // When in check, search starts from here // and addition of two tempos ss->staticEval = bestValue = (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) - : -(ss-1)->staticEval + 2 * Tempo; + : -(ss-1)->staticEval; // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) @@ -1533,7 +1461,7 @@ moves_loop: // When in check, search starts from here // Initialize a MovePicker object for the current position, and prepare // to search the moves. Because the depth is <= 0 here, only captures, - // queen and checking knight promotions, and other checks(only if depth >= DEPTH_QS_CHECKS) + // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) // will be generated. MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, @@ -1550,15 +1478,13 @@ moves_loop: // When in check, search starts from here moveCount++; - // Futility pruning + // Futility pruning and moveCount pruning if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY && !givesCheck && futilityBase > -VALUE_KNOWN_WIN - && !pos.advanced_pawn_push(move)) + && type_of(move) != PROMOTION) { - assert(type_of(move) != EN_PASSANT); // Due to !pos.advanced_pawn_push - // moveCount pruning if (moveCount > 2) continue; @@ -1598,7 +1524,7 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; - // CounterMove based pruning + // Continuation history based pruning if ( !captureOrPromotion && bestValue > VALUE_TB_LOSS_IN_MAX_PLY && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold @@ -1607,7 +1533,7 @@ moves_loop: // When in check, search starts from here // Make and search the move pos.do_move(move, st, givesCheck); - value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); + value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); @@ -1942,7 +1868,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { bool RootMove::extract_ponder_from_tt(Position& pos) { StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize); + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); bool ttHit; @@ -2011,3 +1937,5 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { m.tbRank = 0; } } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/search.h b/DroidFishApp/src/main/cpp/stockfish/search.h index 3bf3e9a..801baac 100644 --- a/DroidFishApp/src/main/cpp/stockfish/search.h +++ b/DroidFishApp/src/main/cpp/stockfish/search.h @@ -25,6 +25,8 @@ #include "movepick.h" #include "types.h" +namespace Stockfish { + class Position; namespace Search { @@ -50,6 +52,7 @@ struct Stack { bool inCheck; bool ttPv; bool ttHit; + int doubleExtensions; }; @@ -106,4 +109,6 @@ void clear(); } // namespace Search +} // namespace Stockfish + #endif // #ifndef SEARCH_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp index 54b06ef..96b2970 100644 --- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.cpp @@ -50,9 +50,11 @@ #include #endif -using namespace Tablebases; +using namespace Stockfish::Tablebases; -int Tablebases::MaxCardinality; +int Stockfish::Tablebases::MaxCardinality; + +namespace Stockfish { namespace { @@ -103,9 +105,6 @@ template<> inline void swap_endian(uint8_t&) {} template T number(void* addr) { - static const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; - static const bool IsLittleEndian = (Le.c[0] == 4); - T v; if ((uintptr_t)addr & (alignof(T) - 1)) // Unaligned pointer (very rare) @@ -190,7 +189,8 @@ public: std::stringstream ss(Paths); std::string path; - while (std::getline(ss, path, SepChar)) { + while (std::getline(ss, path, SepChar)) + { fname = path + "/" + f; std::ifstream::open(fname); if (is_open()) @@ -472,8 +472,6 @@ TBTables TBTables; // If the corresponding file exists two new objects TBTable and TBTable // are created and added to the lists and hash table. Called at init time. void TBTables::add(const std::vector& pieces) { - if (sizeof(char*) < 8 && pieces.size() >= 6) - return; // Not enough address space to support 6-men TB on 32-bit OS std::string code; @@ -567,7 +565,8 @@ int decompress_pairs(PairsData* d, uint64_t idx) { int buf64Size = 64; Sym sym; - while (true) { + while (true) + { int len = 0; // This is the symbol length - d->min_sym_len // Now get the symbol length. For any symbol s64 of length l right-padded @@ -605,8 +604,8 @@ int decompress_pairs(PairsData* d, uint64_t idx) { // We binary-search for our value recursively expanding into the left and // right child symbols until we reach a leaf node where symlen[sym] + 1 == 1 // that will store the value we need. - while (d->symlen[sym]) { - + while (d->symlen[sym]) + { Sym left = d->btree[sym].get(); // If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and @@ -711,7 +710,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu leadPawns = b = pos.pieces(color_of(pc), PAWN); do - squares[size++] = pop_lsb(&b) ^ flipSquares; + squares[size++] = pop_lsb(b) ^ flipSquares; while (b); leadPawnsCnt = size; @@ -731,7 +730,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // directly map them to the correct color and square. b = pos.pieces() ^ leadPawns; do { - Square s = pop_lsb(&b); + Square s = pop_lsb(b); squares[size] = s ^ flipSquares; pieces[size++] = Piece(pos.piece_on(s) ^ flipColor); } while (b); @@ -1537,6 +1536,14 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { WDLScore wdl = -probe_wdl(pos, &result); dtz = dtz_before_zeroing(wdl); } + else if (pos.is_draw(1)) + { + // In case a root move leads to a draw by repetition or + // 50-move rule, we set dtz to zero. Note: since we are + // only 1 ply from the root, this must be a true 3-fold + // repetition inside the game history. + dtz = 0; + } else { // Otherwise, take dtz for the new position and correct by 1 ply @@ -1587,6 +1594,7 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { ProbeState result; StateInfo st; + WDLScore wdl; bool rule50 = Options["Syzygy50MoveRule"]; @@ -1595,7 +1603,10 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { { pos.do_move(m.pv[0], st); - WDLScore wdl = -probe_wdl(pos, &result); + if (pos.is_draw(1)) + wdl = WDLDraw; + else + wdl = -probe_wdl(pos, &result); pos.undo_move(m.pv[0]); @@ -1612,3 +1623,5 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { return true; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h index cefd39c..56734af 100644 --- a/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h +++ b/DroidFishApp/src/main/cpp/stockfish/syzygy/tbprobe.h @@ -23,7 +23,7 @@ #include "../search.h" -namespace Tablebases { +namespace Stockfish::Tablebases { enum WDLScore { WDLLoss = -2, // Loss @@ -73,6 +73,6 @@ inline std::ostream& operator<<(std::ostream& os, const ProbeState v) { return os; } -} +} // namespace Stockfish::Tablebases #endif diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.cpp b/DroidFishApp/src/main/cpp/stockfish/thread.cpp index a12c0bc..da8e1d0 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/thread.cpp @@ -26,6 +26,8 @@ #include "syzygy/tbprobe.h" #include "tt.h" +namespace Stockfish { + ThreadPool Threads; // Global object @@ -126,14 +128,16 @@ void Thread::idle_loop() { void ThreadPool::set(size_t requested) { - if (size() > 0) { // destroy any existing thread(s) + if (size() > 0) // destroy any existing thread(s) + { main()->wait_for_search_finished(); while (size() > 0) delete back(), pop_back(); } - if (requested > 0) { // create new thread(s) + if (requested > 0) // create new thread(s) + { push_back(new MainThread(0)); while (size() < requested) @@ -258,3 +262,5 @@ void ThreadPool::wait_for_search_finished() const { if (th != front()) th->wait_for_search_finished(); } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/thread.h b/DroidFishApp/src/main/cpp/stockfish/thread.h index 585f208..5bfa235 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread.h +++ b/DroidFishApp/src/main/cpp/stockfish/thread.h @@ -32,6 +32,7 @@ #include "search.h" #include "thread_win32_osx.h" +namespace Stockfish { /// Thread class keeps together all the thread-related stuff. We use /// per-thread pawn and material hash tables so that once we get a @@ -54,6 +55,7 @@ public: void idle_loop(); void start_searching(); void wait_for_search_finished(); + size_t id() const { return idx; } Pawns::Table pawnsTable; Material::Table materialTable; @@ -72,8 +74,7 @@ public: LowPlyHistory lowPlyHistory; CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; - Score contempt; - int failedHighCnt; + Score trend; }; @@ -128,4 +129,6 @@ private: extern ThreadPool Threads; +} // namespace Stockfish + #endif // #ifndef THREAD_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h index a0e4d19..a21674c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h +++ b/DroidFishApp/src/main/cpp/stockfish/thread_win32_osx.h @@ -31,6 +31,8 @@ #include +namespace Stockfish { + static const size_t TH_STACK_SIZE = 8 * 1024 * 1024; template > @@ -57,10 +59,16 @@ public: void join() { pthread_join(thread, NULL); } }; +} // namespace Stockfish + #else // Default case: use STL classes +namespace Stockfish { + typedef std::thread NativeThread; +} // namespace Stockfish + #endif #endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp index fc4fbaa..f742d1e 100644 --- a/DroidFishApp/src/main/cpp/stockfish/timeman.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/timeman.cpp @@ -24,6 +24,8 @@ #include "timeman.h" #include "uci.h" +namespace Stockfish { + TimeManagement Time; // Our global time management object @@ -95,3 +97,5 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { if (Options["Ponder"]) optimumTime += optimumTime / 4; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/timeman.h b/DroidFishApp/src/main/cpp/stockfish/timeman.h index 55a68de..b1878d6 100644 --- a/DroidFishApp/src/main/cpp/stockfish/timeman.h +++ b/DroidFishApp/src/main/cpp/stockfish/timeman.h @@ -23,6 +23,8 @@ #include "search.h" #include "thread.h" +namespace Stockfish { + /// The TimeManagement class computes the optimal time to think depending on /// the maximum available time, the game move number and other parameters. @@ -44,4 +46,6 @@ private: extern TimeManagement Time; +} // namespace Stockfish + #endif // #ifndef TIMEMAN_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.cpp b/DroidFishApp/src/main/cpp/stockfish/tt.cpp index cb5af5c..1f495ca 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tt.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/tt.cpp @@ -26,6 +26,8 @@ #include "tt.h" #include "uci.h" +namespace Stockfish { + TranspositionTable TT; // Our global transposition table /// TTEntry::save() populates the TTEntry with a new node's data, possibly @@ -156,3 +158,5 @@ int TranspositionTable::hashfull() const { return cnt / ClusterSize; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/tt.h b/DroidFishApp/src/main/cpp/stockfish/tt.h index a750b6c..d915d92 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tt.h +++ b/DroidFishApp/src/main/cpp/stockfish/tt.h @@ -22,6 +22,8 @@ #include "misc.h" #include "types.h" +namespace Stockfish { + /// TTEntry struct is the 10 bytes transposition table entry, defined as below: /// /// key 16 bit @@ -100,4 +102,6 @@ private: extern TranspositionTable TT; +} // namespace Stockfish + #endif // #ifndef TT_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.cpp b/DroidFishApp/src/main/cpp/stockfish/tune.cpp index 424bdac..ac91b60 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tune.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/tune.cpp @@ -26,9 +26,10 @@ using std::string; +namespace Stockfish { + bool Tune::update_on_last; const UCI::Option* LastOption = nullptr; -BoolConditions Conditions; static std::map TuneResults; string Tune::next(string& names, bool pop) { @@ -108,23 +109,7 @@ template<> void Tune::Entry::read_option() { template<> void Tune::Entry::init_option() {} template<> void Tune::Entry::read_option() { value(); } - -// Set binary conditions according to a probability that depends -// on the corresponding parameter value. - -void BoolConditions::set() { - - static PRNG rng(now()); - static bool startup = true; // To workaround fishtest bench - - for (size_t i = 0; i < binary.size(); i++) - binary[i] = !startup && (values[i] + int(rng.rand() % variance) > threshold); - - startup = false; - - for (size_t i = 0; i < binary.size(); i++) - sync_cout << binary[i] << sync_endl; -} +} // namespace Stockfish // Init options with tuning session results instead of default values. Useful to @@ -138,7 +123,11 @@ void BoolConditions::set() { #include +namespace Stockfish { + void Tune::read_results() { /* ...insert your values here... */ } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/tune.h b/DroidFishApp/src/main/cpp/stockfish/tune.h index c2cd0c9..b5c715b 100644 --- a/DroidFishApp/src/main/cpp/stockfish/tune.h +++ b/DroidFishApp/src/main/cpp/stockfish/tune.h @@ -24,6 +24,8 @@ #include #include +namespace Stockfish { + typedef std::pair Range; // Option's min-max values typedef Range (RangeFun) (int); @@ -44,27 +46,6 @@ struct SetRange { #define SetDefaultRange SetRange(default_range) -/// BoolConditions struct is used to tune boolean conditions in the -/// code by toggling them on/off according to a probability that -/// depends on the value of a tuned integer parameter: for high -/// values of the parameter condition is always disabled, for low -/// values is always enabled, otherwise it is enabled with a given -/// probability that depnends on the parameter under tuning. - -struct BoolConditions { - void init(size_t size) { values.resize(size, defaultValue), binary.resize(size, 0); } - void set(); - - std::vector binary, values; - int defaultValue = 465, variance = 40, threshold = 500; - SetRange range = SetRange(0, 1000); -}; - -extern BoolConditions Conditions; - -inline void set_conditions() { Conditions.set(); } - - /// Tune class implements the 'magic' code that makes the setup of a fishtest /// tuning session as easy as it can be. Mainly you have just to remove const /// qualifiers from the variables you want to tune and flag them for tuning, so @@ -157,14 +138,6 @@ class Tune { return add(value, (next(names), std::move(names)), args...); } - // Template specialization for BoolConditions - template - int add(const SetRange& range, std::string&& names, BoolConditions& cond, Args&&... args) { - for (size_t size = cond.values.size(), i = 0; i < size; i++) - add(cond.range, next(names, i == size - 1) + "_" + std::to_string(i), cond.values[i]); - return add(range, std::move(names), args...); - } - std::vector> list; public: @@ -185,9 +158,6 @@ public: #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true -// Some macro to tune toggling of boolean conditions -#define CONDITION(x) (Conditions.binary[__COUNTER__] || (x)) -#define TUNE_CONDITIONS() int UNIQUE(c, __LINE__) = (Conditions.init(__COUNTER__), 0); \ - TUNE(Conditions, set_conditions) +} // namespace Stockfish #endif // #ifndef TUNE_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/types.h b/DroidFishApp/src/main/cpp/stockfish/types.h index d270384..0bd4a1c 100644 --- a/DroidFishApp/src/main/cpp/stockfish/types.h +++ b/DroidFishApp/src/main/cpp/stockfish/types.h @@ -83,6 +83,8 @@ # define pext(b, m) 0 #endif +namespace Stockfish { + #ifdef USE_POPCNT constexpr bool HasPopCnt = true; #else @@ -189,7 +191,6 @@ enum Value : int { BishopValueMg = 825, BishopValueEg = 915, RookValueMg = 1276, RookValueEg = 1380, QueenValueMg = 2538, QueenValueEg = 2682, - Tempo = 28, MidgameLimit = 15258, EndgameLimit = 3915 }; @@ -482,6 +483,8 @@ constexpr Key make_key(uint64_t seed) { return seed * 6364136223846793005ULL + 1442695040888963407ULL; } +} // namespace Stockfish + #endif // #ifndef TYPES_H_INCLUDED #include "tune.h" // Global visibility to tuning setup diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.cpp b/DroidFishApp/src/main/cpp/stockfish/uci.cpp index b3017e9..b3738a4 100644 --- a/DroidFishApp/src/main/cpp/stockfish/uci.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/uci.cpp @@ -34,6 +34,8 @@ using namespace std; +namespace Stockfish { + extern vector setup_bench(const Position&, istream&); namespace { @@ -205,13 +207,13 @@ namespace { // Coefficients of a 3rd order polynomial fit based on fishtest data // for two parameters needed to transform eval to the argument of a // logistic function. - double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679}; - double bs[] = {-3.37154371, 28.44489198, -56.67657741, 72.05858751}; + double as[] = {-3.68389304, 30.07065921, -60.52878723, 149.53378557}; + double bs[] = {-2.0181857, 15.85685038, -29.83452023, 47.59078827}; double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; // Transform eval to centipawns with limited range - double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); + double x = std::clamp(double(100 * v) / PawnValueEg, -2000.0, 2000.0); // Return win rate in per mille (rounded to nearest) return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); @@ -275,7 +277,15 @@ void UCI::loop(int argc, char* argv[]) { else if (token == "d") sync_cout << pos << sync_endl; else if (token == "eval") trace_eval(pos); else if (token == "compiler") sync_cout << compiler_info() << sync_endl; - else + else if (token == "export_net") + { + std::optional filename; + std::string f; + if (is >> skipws >> f) + filename = f; + Eval::NNUE::save_eval(filename); + } + else if (!token.empty() && token[0] != '#') sync_cout << "Unknown command: " << cmd << sync_endl; } while (token != "quit" && argc == 1); // Command line args are one-shot @@ -369,3 +379,5 @@ Move UCI::to_move(const Position& pos, string& str) { return MOVE_NONE; } + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/cpp/stockfish/uci.h b/DroidFishApp/src/main/cpp/stockfish/uci.h index edcfcad..d316010 100644 --- a/DroidFishApp/src/main/cpp/stockfish/uci.h +++ b/DroidFishApp/src/main/cpp/stockfish/uci.h @@ -24,6 +24,8 @@ #include "types.h" +namespace Stockfish { + class Position; namespace UCI { @@ -78,4 +80,6 @@ Move to_move(const Position& pos, std::string& str); extern UCI::OptionsMap Options; +} // namespace Stockfish + #endif // #ifndef UCI_H_INCLUDED diff --git a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp index 03f377b..07b3027 100644 --- a/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp +++ b/DroidFishApp/src/main/cpp/stockfish/ucioption.cpp @@ -31,6 +31,8 @@ using std::string; +namespace Stockfish { + UCI::OptionsMap Options; // Global object namespace UCI { @@ -59,8 +61,6 @@ void init(OptionsMap& o) { constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; o["Debug Log File"] << Option("", on_logger); - o["Contempt"] << Option(24, -100, 100); - o["Analysis Contempt"] << Option("Both var Off var White var Black var Both", "Both"); o["Threads"] << Option(1, 1, 512, on_threads); o["Hash"] << Option(16, 1, MaxHashMB, on_hash_size); o["Clear Hash"] << Option(on_clear_hash); @@ -190,3 +190,5 @@ Option& Option::operator=(const string& v) { } } // namespace UCI + +} // namespace Stockfish diff --git a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java index b2cd280..ba9ed99 100644 --- a/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java +++ b/DroidFishApp/src/main/java/org/petero/droidfish/engine/InternalStockFish.java @@ -36,7 +36,7 @@ import org.petero.droidfish.EngineOptions; /** Stockfish engine running as process, started from assets resource. */ public class InternalStockFish extends ExternalEngine { - private static final String defaultNet = "nn-62ef826d1a6d.nnue"; + private static final String defaultNet = "nn-3475407dc199.nnue"; private static final String netOption = "evalfile"; private File defaultNetFile; // To get the full path of the copied default network file