2021-10-30 04:25:12 +08:00
local bench = script and require ( script.Parent . bench_support ) or require ( " bench_support " )
function test ( )
-- https://github.com/stefandd/Tic4
local negaMax = { maxdepth = 4 , minsearchpos = 0 , numsearchpos = 0 }
negaMax.__index = negaMax
function negaMax : evaluate ( board , depth )
--[[
What can be confusing is how the heuristic value of the current node is calculated . In this implementation , this value is always calculated from the point of view of player A , whose color value is one . In other words , higher heuristic values always represent situations more favorable for player A . This is the same behavior as the normal minimax algorithm . The heuristic value is not necessarily the same as a node ' s return value due to value negation by negamax and the color parameter. The negamax node ' s return value is a heuristic score from the point of view of the node ' s current player.
Negamax scores match minimax scores for nodes where player A is about to play , and where player A is the maximizing player in the minimax equivalent . Negamax always searches for the maximum value for all its nodes . Hence for player B nodes , the minimax score is a negation of its negamax score . Player B is the minimizing player in the minimax equivalent .
Variations in negamax implementations may omit the color parameter . In this case , the heuristic evaluation function must return values from the point of view of the node ' s current player.
--]]
print ( " This function needs to be implemented! " )
end
function negaMax : move_candidates ( board , side_to_move )
print ( " This function needs to be implemented! " )
end
function negaMax : make_move ( board , side_to_move , move )
print ( " This function needs to be implemented! " )
end
function negaMax : negaMax ( board , side_to_move , depth , alpha , beta ) -- side_to_move: e.g. 1 is blue, -1 is read
--
-- init vars for root call
--
if not depth then -- root call
depth = 0
alpha = - math.huge
beta = math.huge
self.numsearchpos = 0 -- reset call counter
end
--
-- test if the node is terminal (i.e. full board or win)
--
local best_move = - 1
local score , is_term_node = self : evaluate ( board , depth )
-- we abort the recursion if this is a terminal node, or if one of the search abort conditions are met
--
if is_term_node or depth == self.maxdepth then
return side_to_move * score , best_move , is_term_node
end
--
-- if not terminal node, eval child nodes
--
local moves = self : move_candidates ( board , side_to_move )
score = - math.huge
for _ , analyzed_move in pairs ( moves ) do -- iterate over all boards
self.numsearchpos = self.numsearchpos + 1
local b = self : make_move ( board , side_to_move , analyzed_move )
local move_score , _ , _ = - self : negaMax ( b , - side_to_move , depth + 1 , - beta , - alpha )
if move_score > score then
score = move_score
best_move = analyzed_move
end
-- disable alpha-beta pruning
--
alpha = math.max ( alpha , score )
if alpha >= beta then
break
end
--
end
if depth == 0 then
--
-- exit for root call (depth == 0)
--
-- debug stuff
--print(string.format("---- Negamax: node info, depth: %d, side: %d, score: %d, best move: %d", depth, side_to_move, score, best_move))
--print_board(board)
--print(string.format("----"))
print ( " Analyzed positions: " .. self.numsearchpos )
end
return score , best_move , game_over
end
local empty_board = { 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 } -- 16 empty positions
----------- helper methods
function copy_board ( board )
local copy = { }
for i = 1 , # board do
copy [ i ] = board [ i ]
end
return copy
end
function print_board ( board )
gboard = { }
for i = 1 , # board do
if board [ i ] == 0 then gboard [ i ] = ' . '
elseif board [ i ] == 1 then gboard [ i ] = ' x '
else gboard [ i ] = ' o '
end
end
print ( string.format ( " \n %s %s %s %s \n %s %s %s %s \n %s %s %s %s \n %s %s %s %s \n " , unpack ( gboard ) ) )
end
function is_board_full ( board )
for i = 1 , # board do
if board [ i ] == 0 then
return false
end
end
return true
end
----------- implement negaMax methods
negaMax.index_quadruplets = {
{ 1 , 2 , 3 , 4 } , { 5 , 6 , 7 , 8 } , { 9 , 10 , 11 , 12 } , -- rows
{ 13 , 14 , 15 , 16 } , { 1 , 5 , 9 , 13 } , { 2 , 6 , 10 , 14 } , -- cols
{ 3 , 7 , 11 , 15 } , { 4 , 8 , 12 , 16 } , { 1 , 6 , 11 , 16 } , { 4 , 7 , 10 , 13 } , -- diags
{ 1 , 2 , 5 , 6 } , { 2 , 3 , 6 , 7 } , { 3 , 4 , 7 , 8 } , -- squares
{ 5 , 6 , 9 , 10 } , { 6 , 7 , 10 , 11 } , { 7 , 8 , 11 , 12 } ,
{ 9 , 10 , 13 , 14 } , { 10 , 11 , 14 , 15 } , { 11 , 12 , 15 , 16 }
}
function negaMax : evaluate ( board , depth ) -- return format is score, is_terminal_position
--[[
What can be confusing is how the heuristic value of the current node is calculated . In this implementation , this value is always calculated from the point of view of player A , whose color value is one . In other words , higher heuristic values always represent situations more favorable for player A . This is the same behavior as the normal minimax algorithm . The heuristic value is not necessarily the same as a node ' s return value due to value negation by negamax and the color parameter. The negamax node ' s return value is a heuristic score from the point of view of the node ' s current player.
Negamax scores match minimax scores for nodes where player A is about to play , and where player A is the maximizing player in the minimax equivalent . Negamax always searches for the maximum value for all its nodes . Hence for player B nodes , the minimax score is a negation of its negamax score . Player B is the minimizing player in the minimax equivalent .
Variations in negamax implementations may omit the color parameter . In this case , the heuristic evaluation function must return values from the point of view of the node ' s current player.
--]]
local player_plus_score , player_minus_score = 0 , 0
local game_won = false
for _ , curr_qdr in pairs ( negaMax.index_quadruplets ) do -- iterate over all index quadruplets
-- count the empty positions and positions occupied by the side whos move it is
local player_plus_fields , player_minus_fields , empties = 0 , 0 , 0
2022-07-22 04:36:41 +08:00
for _ , index in next , curr_qdr do -- iterate over all indices
2021-10-30 04:25:12 +08:00
if board [ index ] == 0 then
empties = empties + 1
elseif board [ index ] == 1 then
player_plus_fields = player_plus_fields + 1
elseif board [ index ] == - 1 then
player_minus_fields = player_minus_fields + 1
end
end
-- evaluate the quadruplets score by looking at empty vs occupied positions
if empties == 3 then
if player_plus_fields == 1 then
player_plus_score = player_plus_score + 3
elseif player_minus_fields == 1 then
player_minus_score = player_minus_score + 3
end
elseif empties == 2 then
if player_plus_fields == 2 then
player_plus_score = player_plus_score + 13
elseif player_minus_fields == 2 then
player_minus_score = player_minus_score + 13
end
elseif empties == 1 then
if player_plus_fields == 3 then
player_plus_score = player_plus_score + 31
elseif player_minus_fields == 3 then
player_minus_score = player_minus_score + 31
end
elseif empties == 0 then
-- check for winning situations
if player_plus_fields == 4 then
player_plus_score = 999 - depth
player_minus_score = 0
game_won = true
break
elseif player_minus_fields == 4 then
-- this should not happen if there is a proper terminal node detection!
player_plus_score = 0
player_minus_score = 999 - depth
game_won = true
break
end
end
end
-- return format is score, is_terminal_position
if not game_won and is_board_full ( board ) then
return 0 , true -- DRAW
else
return ( player_plus_score - player_minus_score ) , game_won -- >0 is good for player 1 [+], <0 means good for the other player (player 2 [-]))
end
end
function negaMax : move_candidates ( board , side_to_move )
local moves = { }
for i = 1 , # board do
if board [ i ] == 0 then -- empty?
moves [ # moves + 1 ] = i -- save move that was made
end
end
return moves
end
function negaMax : make_move ( board , side_to_move , move )
local copy = copy_board ( board )
copy [ move ] = side_to_move
return copy
end
local human_player = 1
local AI_player = - human_player
local game_board = copy_board ( empty_board )
local curr_move = - 1
local curr_player = human_player -- human player goes first
local score = 0
local stop_loop = false
local game_over = false
negaMax.maxdepth = 5
local t0 = os.clock ( )
score , curr_move = negaMax : negaMax ( game_board , curr_player )
local t1 = os.clock ( )
return t1 - t0
end
2022-07-22 04:36:41 +08:00
bench.runCode ( test , " tictactoe " )