Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aimacode
GitHub Repository: aimacode/aima-python
Path: blob/master/games4e.py
615 views
1
"""Games or Adversarial Search (Chapter 5)"""
2
3
import copy
4
import itertools
5
import random
6
from collections import namedtuple
7
8
import numpy as np
9
10
from utils4e import vector_add, MCT_Node, ucb
11
12
GameState = namedtuple('GameState', 'to_move, utility, board, moves')
13
StochasticGameState = namedtuple('StochasticGameState', 'to_move, utility, board, moves, chance')
14
15
16
# ______________________________________________________________________________
17
# MinMax Search
18
19
20
def minmax_decision(state, game):
21
"""Given a state in a game, calculate the best move by searching
22
forward all the way to the terminal states. [Figure 5.3]"""
23
24
player = game.to_move(state)
25
26
def max_value(state):
27
if game.terminal_test(state):
28
return game.utility(state, player)
29
v = -np.inf
30
for a in game.actions(state):
31
v = max(v, min_value(game.result(state, a)))
32
return v
33
34
def min_value(state):
35
if game.terminal_test(state):
36
return game.utility(state, player)
37
v = np.inf
38
for a in game.actions(state):
39
v = min(v, max_value(game.result(state, a)))
40
return v
41
42
# Body of minmax_decision:
43
return max(game.actions(state), key=lambda a: min_value(game.result(state, a)))
44
45
46
# ______________________________________________________________________________
47
48
49
def expect_minmax(state, game):
50
"""
51
[Figure 5.11]
52
Return the best move for a player after dice are thrown. The game tree
53
includes chance nodes along with min and max nodes.
54
"""
55
player = game.to_move(state)
56
57
def max_value(state):
58
v = -np.inf
59
for a in game.actions(state):
60
v = max(v, chance_node(state, a))
61
return v
62
63
def min_value(state):
64
v = np.inf
65
for a in game.actions(state):
66
v = min(v, chance_node(state, a))
67
return v
68
69
def chance_node(state, action):
70
res_state = game.result(state, action)
71
if game.terminal_test(res_state):
72
return game.utility(res_state, player)
73
sum_chances = 0
74
num_chances = len(game.chances(res_state))
75
for chance in game.chances(res_state):
76
res_state = game.outcome(res_state, chance)
77
util = 0
78
if res_state.to_move == player:
79
util = max_value(res_state)
80
else:
81
util = min_value(res_state)
82
sum_chances += util * game.probability(chance)
83
return sum_chances / num_chances
84
85
# Body of expect_min_max:
86
return max(game.actions(state), key=lambda a: chance_node(state, a), default=None)
87
88
89
def alpha_beta_search(state, game):
90
"""Search game to determine best action; use alpha-beta pruning.
91
As in [Figure 5.7], this version searches all the way to the leaves."""
92
93
player = game.to_move(state)
94
95
# Functions used by alpha_beta
96
def max_value(state, alpha, beta):
97
if game.terminal_test(state):
98
return game.utility(state, player)
99
v = -np.inf
100
for a in game.actions(state):
101
v = max(v, min_value(game.result(state, a), alpha, beta))
102
if v >= beta:
103
return v
104
alpha = max(alpha, v)
105
return v
106
107
def min_value(state, alpha, beta):
108
if game.terminal_test(state):
109
return game.utility(state, player)
110
v = np.inf
111
for a in game.actions(state):
112
v = min(v, max_value(game.result(state, a), alpha, beta))
113
if v <= alpha:
114
return v
115
beta = min(beta, v)
116
return v
117
118
# Body of alpha_beta_search:
119
best_score = -np.inf
120
beta = np.inf
121
best_action = None
122
for a in game.actions(state):
123
v = min_value(game.result(state, a), best_score, beta)
124
if v > best_score:
125
best_score = v
126
best_action = a
127
return best_action
128
129
130
def alpha_beta_cutoff_search(state, game, d=4, cutoff_test=None, eval_fn=None):
131
"""Search game to determine best action; use alpha-beta pruning.
132
This version cuts off search and uses an evaluation function."""
133
134
player = game.to_move(state)
135
136
# Functions used by alpha_beta
137
def max_value(state, alpha, beta, depth):
138
if cutoff_test(state, depth):
139
return eval_fn(state)
140
v = -np.inf
141
for a in game.actions(state):
142
v = max(v, min_value(game.result(state, a), alpha, beta, depth + 1))
143
if v >= beta:
144
return v
145
alpha = max(alpha, v)
146
return v
147
148
def min_value(state, alpha, beta, depth):
149
if cutoff_test(state, depth):
150
return eval_fn(state)
151
v = np.inf
152
for a in game.actions(state):
153
v = min(v, max_value(game.result(state, a), alpha, beta, depth + 1))
154
if v <= alpha:
155
return v
156
beta = min(beta, v)
157
return v
158
159
# Body of alpha_beta_cutoff_search starts here:
160
# The default test cuts off at depth d or at a terminal state
161
cutoff_test = (cutoff_test or (lambda state, depth: depth > d or game.terminal_test(state)))
162
eval_fn = eval_fn or (lambda state: game.utility(state, player))
163
best_score = -np.inf
164
beta = np.inf
165
best_action = None
166
for a in game.actions(state):
167
v = min_value(game.result(state, a), best_score, beta, 1)
168
if v > best_score:
169
best_score = v
170
best_action = a
171
return best_action
172
173
174
# ______________________________________________________________________________
175
# Monte Carlo Tree Search
176
177
178
def monte_carlo_tree_search(state, game, N=1000):
179
def select(n):
180
"""select a leaf node in the tree"""
181
if n.children:
182
return select(max(n.children.keys(), key=ucb))
183
else:
184
return n
185
186
def expand(n):
187
"""expand the leaf node by adding all its children states"""
188
if not n.children and not game.terminal_test(n.state):
189
n.children = {MCT_Node(state=game.result(n.state, action), parent=n): action
190
for action in game.actions(n.state)}
191
return select(n)
192
193
def simulate(game, state):
194
"""simulate the utility of current state by random picking a step"""
195
player = game.to_move(state)
196
while not game.terminal_test(state):
197
action = random.choice(list(game.actions(state)))
198
state = game.result(state, action)
199
v = game.utility(state, player)
200
return -v
201
202
def backprop(n, utility):
203
"""passing the utility back to all parent nodes"""
204
if utility > 0:
205
n.U += utility
206
# if utility == 0:
207
# n.U += 0.5
208
n.N += 1
209
if n.parent:
210
backprop(n.parent, -utility)
211
212
root = MCT_Node(state=state)
213
214
for _ in range(N):
215
leaf = select(root)
216
child = expand(leaf)
217
result = simulate(game, child.state)
218
backprop(child, result)
219
220
max_state = max(root.children, key=lambda p: p.N)
221
222
return root.children.get(max_state)
223
224
225
# ______________________________________________________________________________
226
# Players for Games
227
228
229
def query_player(game, state):
230
"""Make a move by querying standard input."""
231
print("current state:")
232
game.display(state)
233
print("available moves: {}".format(game.actions(state)))
234
print("")
235
move = None
236
if game.actions(state):
237
move_string = input('Your move? ')
238
try:
239
move = eval(move_string)
240
except NameError:
241
move = move_string
242
else:
243
print('no legal moves: passing turn to next player')
244
return move
245
246
247
def random_player(game, state):
248
"""A player that chooses a legal move at random."""
249
return random.choice(game.actions(state)) if game.actions(state) else None
250
251
252
def alpha_beta_player(game, state):
253
return alpha_beta_search(state, game)
254
255
256
def expect_min_max_player(game, state):
257
return expect_minmax(state, game)
258
259
260
def mcts_player(game, state):
261
return monte_carlo_tree_search(state, game)
262
263
264
# ______________________________________________________________________________
265
# Some Sample Games
266
267
268
class Game:
269
"""A game is similar to a problem, but it has a utility for each
270
state and a terminal test instead of a path cost and a goal
271
test. To create a game, subclass this class and implement actions,
272
result, utility, and terminal_test. You may override display and
273
successors or you can inherit their default methods. You will also
274
need to set the .initial attribute to the initial state; this can
275
be done in the constructor."""
276
277
def actions(self, state):
278
"""Return a list of the allowable moves at this point."""
279
raise NotImplementedError
280
281
def result(self, state, move):
282
"""Return the state that results from making a move from a state."""
283
raise NotImplementedError
284
285
def utility(self, state, player):
286
"""Return the value of this final state to player."""
287
raise NotImplementedError
288
289
def terminal_test(self, state):
290
"""Return True if this is a final state for the game."""
291
return not self.actions(state)
292
293
def to_move(self, state):
294
"""Return the player whose move it is in this state."""
295
return state.to_move
296
297
def display(self, state):
298
"""Print or otherwise display the state."""
299
print(state)
300
301
def __repr__(self):
302
return '<{}>'.format(self.__class__.__name__)
303
304
def play_game(self, *players):
305
"""Play an n-person, move-alternating game."""
306
state = self.initial
307
while True:
308
for player in players:
309
move = player(self, state)
310
state = self.result(state, move)
311
if self.terminal_test(state):
312
self.display(state)
313
return self.utility(state, self.to_move(self.initial))
314
315
316
class StochasticGame(Game):
317
"""A stochastic game includes uncertain events which influence
318
the moves of players at each state. To create a stochastic game, subclass
319
this class and implement chances and outcome along with the other
320
unimplemented game class methods."""
321
322
def chances(self, state):
323
"""Return a list of all possible uncertain events at a state."""
324
raise NotImplementedError
325
326
def outcome(self, state, chance):
327
"""Return the state which is the outcome of a chance trial."""
328
raise NotImplementedError
329
330
def probability(self, chance):
331
"""Return the probability of occurrence of a chance."""
332
raise NotImplementedError
333
334
def play_game(self, *players):
335
"""Play an n-person, move-alternating stochastic game."""
336
state = self.initial
337
while True:
338
for player in players:
339
chance = random.choice(self.chances(state))
340
state = self.outcome(state, chance)
341
move = player(self, state)
342
state = self.result(state, move)
343
if self.terminal_test(state):
344
self.display(state)
345
return self.utility(state, self.to_move(self.initial))
346
347
348
class Fig52Game(Game):
349
"""The game represented in [Figure 5.2]. Serves as a simple test case."""
350
351
succs = dict(A=dict(a1='B', a2='C', a3='D'),
352
B=dict(b1='B1', b2='B2', b3='B3'),
353
C=dict(c1='C1', c2='C2', c3='C3'),
354
D=dict(d1='D1', d2='D2', d3='D3'))
355
utils = dict(B1=3, B2=12, B3=8, C1=2, C2=4, C3=6, D1=14, D2=5, D3=2)
356
initial = 'A'
357
358
def actions(self, state):
359
return list(self.succs.get(state, {}).keys())
360
361
def result(self, state, move):
362
return self.succs[state][move]
363
364
def utility(self, state, player):
365
if player == 'MAX':
366
return self.utils[state]
367
else:
368
return -self.utils[state]
369
370
def terminal_test(self, state):
371
return state not in ('A', 'B', 'C', 'D')
372
373
def to_move(self, state):
374
return 'MIN' if state in 'BCD' else 'MAX'
375
376
377
class Fig52Extended(Game):
378
"""Similar to Fig52Game but bigger. Useful for visualisation"""
379
380
succs = {i: dict(l=i * 3 + 1, m=i * 3 + 2, r=i * 3 + 3) for i in range(13)}
381
utils = dict()
382
383
def actions(self, state):
384
return sorted(list(self.succs.get(state, {}).keys()))
385
386
def result(self, state, move):
387
return self.succs[state][move]
388
389
def utility(self, state, player):
390
if player == 'MAX':
391
return self.utils[state]
392
else:
393
return -self.utils[state]
394
395
def terminal_test(self, state):
396
return state not in range(13)
397
398
def to_move(self, state):
399
return 'MIN' if state in {1, 2, 3} else 'MAX'
400
401
402
class TicTacToe(Game):
403
"""Play TicTacToe on an h x v board, with Max (first player) playing 'X'.
404
A state has the player to move, a cached utility, a list of moves in
405
the form of a list of (x, y) positions, and a board, in the form of
406
a dict of {(x, y): Player} entries, where Player is 'X' or 'O'."""
407
408
def __init__(self, h=3, v=3, k=3):
409
self.h = h
410
self.v = v
411
self.k = k
412
moves = [(x, y) for x in range(1, h + 1)
413
for y in range(1, v + 1)]
414
self.initial = GameState(to_move='X', utility=0, board={}, moves=moves)
415
416
def actions(self, state):
417
"""Legal moves are any square not yet taken."""
418
return state.moves
419
420
def result(self, state, move):
421
if move not in state.moves:
422
return state # Illegal move has no effect
423
board = state.board.copy()
424
board[move] = state.to_move
425
moves = list(state.moves)
426
moves.remove(move)
427
return GameState(to_move=('O' if state.to_move == 'X' else 'X'),
428
utility=self.compute_utility(board, move, state.to_move),
429
board=board, moves=moves)
430
431
def utility(self, state, player):
432
"""Return the value to player; 1 for win, -1 for loss, 0 otherwise."""
433
return state.utility if player == 'X' else -state.utility
434
435
def terminal_test(self, state):
436
"""A state is terminal if it is won or there are no empty squares."""
437
return state.utility != 0 or len(state.moves) == 0
438
439
def display(self, state):
440
board = state.board
441
for x in range(1, self.h + 1):
442
for y in range(1, self.v + 1):
443
print(board.get((x, y), '.'), end=' ')
444
print()
445
446
def compute_utility(self, board, move, player):
447
"""If 'X' wins with this move, return 1; if 'O' wins return -1; else return 0."""
448
if (self.k_in_row(board, move, player, (0, 1)) or
449
self.k_in_row(board, move, player, (1, 0)) or
450
self.k_in_row(board, move, player, (1, -1)) or
451
self.k_in_row(board, move, player, (1, 1))):
452
return +1 if player == 'X' else -1
453
else:
454
return 0
455
456
def k_in_row(self, board, move, player, delta_x_y):
457
"""Return true if there is a line through move on board for player."""
458
(delta_x, delta_y) = delta_x_y
459
x, y = move
460
n = 0 # n is number of moves in row
461
while board.get((x, y)) == player:
462
n += 1
463
x, y = x + delta_x, y + delta_y
464
x, y = move
465
while board.get((x, y)) == player:
466
n += 1
467
x, y = x - delta_x, y - delta_y
468
n -= 1 # Because we counted move itself twice
469
return n >= self.k
470
471
472
class ConnectFour(TicTacToe):
473
"""A TicTacToe-like game in which you can only make a move on the bottom
474
row, or in a square directly above an occupied square. Traditionally
475
played on a 7x6 board and requiring 4 in a row."""
476
477
def __init__(self, h=7, v=6, k=4):
478
TicTacToe.__init__(self, h, v, k)
479
480
def actions(self, state):
481
return [(x, y) for (x, y) in state.moves
482
if y == 1 or (x, y - 1) in state.board]
483
484
485
class Backgammon(StochasticGame):
486
"""A two player game where the goal of each player is to move all the
487
checkers off the board. The moves for each state are determined by
488
rolling a pair of dice."""
489
490
def __init__(self):
491
"""Initial state of the game"""
492
point = {'W': 0, 'B': 0}
493
board = [point.copy() for index in range(24)]
494
board[0]['B'] = board[23]['W'] = 2
495
board[5]['W'] = board[18]['B'] = 5
496
board[7]['W'] = board[16]['B'] = 3
497
board[11]['B'] = board[12]['W'] = 5
498
self.allow_bear_off = {'W': False, 'B': False}
499
self.direction = {'W': -1, 'B': 1}
500
self.initial = StochasticGameState(to_move='W',
501
utility=0,
502
board=board,
503
moves=self.get_all_moves(board, 'W'), chance=None)
504
505
def actions(self, state):
506
"""Return a list of legal moves for a state."""
507
player = state.to_move
508
moves = state.moves
509
if len(moves) == 1 and len(moves[0]) == 1:
510
return moves
511
legal_moves = []
512
for move in moves:
513
board = copy.deepcopy(state.board)
514
if self.is_legal_move(board, move, state.chance, player):
515
legal_moves.append(move)
516
return legal_moves
517
518
def result(self, state, move):
519
board = copy.deepcopy(state.board)
520
player = state.to_move
521
self.move_checker(board, move[0], state.chance[0], player)
522
if len(move) == 2:
523
self.move_checker(board, move[1], state.chance[1], player)
524
to_move = ('W' if player == 'B' else 'B')
525
return StochasticGameState(to_move=to_move,
526
utility=self.compute_utility(board, move, player),
527
board=board,
528
moves=self.get_all_moves(board, to_move), chance=None)
529
530
def utility(self, state, player):
531
"""Return the value to player; 1 for win, -1 for loss, 0 otherwise."""
532
return state.utility if player == 'W' else -state.utility
533
534
def terminal_test(self, state):
535
"""A state is terminal if one player wins."""
536
return state.utility != 0
537
538
def get_all_moves(self, board, player):
539
"""All possible moves for a player i.e. all possible ways of
540
choosing two checkers of a player from the board for a move
541
at a given state."""
542
all_points = board
543
taken_points = [index for index, point in enumerate(all_points)
544
if point[player] > 0]
545
if self.checkers_at_home(board, player) == 1:
546
return [(taken_points[0],)]
547
moves = list(itertools.permutations(taken_points, 2))
548
moves = moves + [(index, index) for index, point in enumerate(all_points)
549
if point[player] >= 2]
550
return moves
551
552
def display(self, state):
553
"""Display state of the game."""
554
board = state.board
555
player = state.to_move
556
print("current state : ")
557
for index, point in enumerate(board):
558
print("point : ", index, " W : ", point['W'], " B : ", point['B'])
559
print("to play : ", player)
560
561
def compute_utility(self, board, move, player):
562
"""If 'W' wins with this move, return 1; if 'B' wins return -1; else return 0."""
563
util = {'W': 1, 'B': -1}
564
for idx in range(0, 24):
565
if board[idx][player] > 0:
566
return 0
567
return util[player]
568
569
def checkers_at_home(self, board, player):
570
"""Return the no. of checkers at home for a player."""
571
sum_range = range(0, 7) if player == 'W' else range(17, 24)
572
count = 0
573
for idx in sum_range:
574
count = count + board[idx][player]
575
return count
576
577
def is_legal_move(self, board, start, steps, player):
578
"""Move is a tuple which contains starting points of checkers to be
579
moved during a player's turn. An on-board move is legal if both the destinations
580
are open. A bear-off move is the one where a checker is moved off-board.
581
It is legal only after a player has moved all his checkers to his home."""
582
dest1, dest2 = vector_add(start, steps)
583
dest_range = range(0, 24)
584
move1_legal = move2_legal = False
585
if dest1 in dest_range:
586
if self.is_point_open(player, board[dest1]):
587
self.move_checker(board, start[0], steps[0], player)
588
move1_legal = True
589
else:
590
if self.allow_bear_off[player]:
591
self.move_checker(board, start[0], steps[0], player)
592
move1_legal = True
593
if not move1_legal:
594
return False
595
if dest2 in dest_range:
596
if self.is_point_open(player, board[dest2]):
597
move2_legal = True
598
else:
599
if self.allow_bear_off[player]:
600
move2_legal = True
601
return move1_legal and move2_legal
602
603
def move_checker(self, board, start, steps, player):
604
"""Move a checker from starting point by a given number of steps"""
605
dest = start + steps
606
dest_range = range(0, 24)
607
board[start][player] -= 1
608
if dest in dest_range:
609
board[dest][player] += 1
610
if self.checkers_at_home(board, player) == 15:
611
self.allow_bear_off[player] = True
612
613
def is_point_open(self, player, point):
614
"""A point is open for a player if the no. of opponent's
615
checkers already present on it is 0 or 1. A player can
616
move a checker to a point only if it is open."""
617
opponent = 'B' if player == 'W' else 'W'
618
return point[opponent] <= 1
619
620
def chances(self, state):
621
"""Return a list of all possible dice rolls at a state."""
622
dice_rolls = list(itertools.combinations_with_replacement([1, 2, 3, 4, 5, 6], 2))
623
return dice_rolls
624
625
def outcome(self, state, chance):
626
"""Return the state which is the outcome of a dice roll."""
627
dice = tuple(map((self.direction[state.to_move]).__mul__, chance))
628
return StochasticGameState(to_move=state.to_move,
629
utility=state.utility,
630
board=state.board,
631
moves=state.moves, chance=dice)
632
633
def probability(self, chance):
634
"""Return the probability of occurrence of a dice roll."""
635
return 1 / 36 if chance[0] == chance[1] else 1 / 18
636
637