CoCalc -- games4e.py

GitHub Repository: aimacode/aima-python
Path: blob/master/games4e.py
⁶¹⁵ views
1
"""Games or Adversarial Search (Chapter 5)"""
2

3
import copy
4
import itertools
5
import random
6
from collections import namedtuple
7

8
import numpy as np
9

10
from utils4e import vector_add, MCT_Node, ucb
11

12
GameState = namedtuple('GameState', 'to_move, utility, board, moves')
13
StochasticGameState = namedtuple('StochasticGameState', 'to_move, utility, board, moves, chance')
14

15

16
# ______________________________________________________________________________
17
# MinMax Search
18

19

20
def minmax_decision(state, game):
21
    """Given a state in a game, calculate the best move by searching
22
    forward all the way to the terminal states. [Figure 5.3]"""
23

24
    player = game.to_move(state)
25

26
    def max_value(state):
27
        if game.terminal_test(state):
28
            return game.utility(state, player)
29
        v = -np.inf
30
        for a in game.actions(state):
31
            v = max(v, min_value(game.result(state, a)))
32
        return v
33

34
    def min_value(state):
35
        if game.terminal_test(state):
36
            return game.utility(state, player)
37
        v = np.inf
38
        for a in game.actions(state):
39
            v = min(v, max_value(game.result(state, a)))
40
        return v
41

42
    # Body of minmax_decision:
43
    return max(game.actions(state), key=lambda a: min_value(game.result(state, a)))
44

45

46
# ______________________________________________________________________________
47

48

49
def expect_minmax(state, game):
50
    """
51
    [Figure 5.11]
52
    Return the best move for a player after dice are thrown. The game tree
53
	includes chance nodes along with min and max nodes.
54
	"""
55
    player = game.to_move(state)
56

57
    def max_value(state):
58
        v = -np.inf
59
        for a in game.actions(state):
60
            v = max(v, chance_node(state, a))
61
        return v
62

63
    def min_value(state):
64
        v = np.inf
65
        for a in game.actions(state):
66
            v = min(v, chance_node(state, a))
67
        return v
68

69
    def chance_node(state, action):
70
        res_state = game.result(state, action)
71
        if game.terminal_test(res_state):
72
            return game.utility(res_state, player)
73
        sum_chances = 0
74
        num_chances = len(game.chances(res_state))
75
        for chance in game.chances(res_state):
76
            res_state = game.outcome(res_state, chance)
77
            util = 0
78
            if res_state.to_move == player:
79
                util = max_value(res_state)
80
            else:
81
                util = min_value(res_state)
82
            sum_chances += util * game.probability(chance)
83
        return sum_chances / num_chances
84

85
    # Body of expect_min_max:
86
    return max(game.actions(state), key=lambda a: chance_node(state, a), default=None)
87

88

89
def alpha_beta_search(state, game):
90
    """Search game to determine best action; use alpha-beta pruning.
91
    As in [Figure 5.7], this version searches all the way to the leaves."""
92

93
    player = game.to_move(state)
94

95
    # Functions used by alpha_beta
96
    def max_value(state, alpha, beta):
97
        if game.terminal_test(state):
98
            return game.utility(state, player)
99
        v = -np.inf
100
        for a in game.actions(state):
101
            v = max(v, min_value(game.result(state, a), alpha, beta))
102
            if v >= beta:
103
                return v
104
            alpha = max(alpha, v)
105
        return v
106

107
    def min_value(state, alpha, beta):
108
        if game.terminal_test(state):
109
            return game.utility(state, player)
110
        v = np.inf
111
        for a in game.actions(state):
112
            v = min(v, max_value(game.result(state, a), alpha, beta))
113
            if v <= alpha:
114
                return v
115
            beta = min(beta, v)
116
        return v
117

118
    # Body of alpha_beta_search:
119
    best_score = -np.inf
120
    beta = np.inf
121
    best_action = None
122
    for a in game.actions(state):
123
        v = min_value(game.result(state, a), best_score, beta)
124
        if v > best_score:
125
            best_score = v
126
            best_action = a
127
    return best_action
128

129

130
def alpha_beta_cutoff_search(state, game, d=4, cutoff_test=None, eval_fn=None):
131
    """Search game to determine best action; use alpha-beta pruning.
132
    This version cuts off search and uses an evaluation function."""
133

134
    player = game.to_move(state)
135

136
    # Functions used by alpha_beta
137
    def max_value(state, alpha, beta, depth):
138
        if cutoff_test(state, depth):
139
            return eval_fn(state)
140
        v = -np.inf
141
        for a in game.actions(state):
142
            v = max(v, min_value(game.result(state, a), alpha, beta, depth + 1))
143
            if v >= beta:
144
                return v
145
            alpha = max(alpha, v)
146
        return v
147

148
    def min_value(state, alpha, beta, depth):
149
        if cutoff_test(state, depth):
150
            return eval_fn(state)
151
        v = np.inf
152
        for a in game.actions(state):
153
            v = min(v, max_value(game.result(state, a), alpha, beta, depth + 1))
154
            if v <= alpha:
155
                return v
156
            beta = min(beta, v)
157
        return v
158

159
    # Body of alpha_beta_cutoff_search starts here:
160
    # The default test cuts off at depth d or at a terminal state
161
    cutoff_test = (cutoff_test or (lambda state, depth: depth > d or game.terminal_test(state)))
162
    eval_fn = eval_fn or (lambda state: game.utility(state, player))
163
    best_score = -np.inf
164
    beta = np.inf
165
    best_action = None
166
    for a in game.actions(state):
167
        v = min_value(game.result(state, a), best_score, beta, 1)
168
        if v > best_score:
169
            best_score = v
170
            best_action = a
171
    return best_action
172

173

174
# ______________________________________________________________________________
175
# Monte Carlo Tree Search
176

177

178
def monte_carlo_tree_search(state, game, N=1000):
179
    def select(n):
180
        """select a leaf node in the tree"""
181
        if n.children:
182
            return select(max(n.children.keys(), key=ucb))
183
        else:
184
            return n
185

186
    def expand(n):
187
        """expand the leaf node by adding all its children states"""
188
        if not n.children and not game.terminal_test(n.state):
189
            n.children = {MCT_Node(state=game.result(n.state, action), parent=n): action
190
                          for action in game.actions(n.state)}
191
        return select(n)
192

193
    def simulate(game, state):
194
        """simulate the utility of current state by random picking a step"""
195
        player = game.to_move(state)
196
        while not game.terminal_test(state):
197
            action = random.choice(list(game.actions(state)))
198
            state = game.result(state, action)
199
        v = game.utility(state, player)
200
        return -v
201

202
    def backprop(n, utility):
203
        """passing the utility back to all parent nodes"""
204
        if utility > 0:
205
            n.U += utility
206
        # if utility == 0:
207
        #     n.U += 0.5
208
        n.N += 1
209
        if n.parent:
210
            backprop(n.parent, -utility)
211

212
    root = MCT_Node(state=state)
213

214
    for _ in range(N):
215
        leaf = select(root)
216
        child = expand(leaf)
217
        result = simulate(game, child.state)
218
        backprop(child, result)
219

220
    max_state = max(root.children, key=lambda p: p.N)
221

222
    return root.children.get(max_state)
223

224

225
# ______________________________________________________________________________
226
# Players for Games
227

228

229
def query_player(game, state):
230
    """Make a move by querying standard input."""
231
    print("current state:")
232
    game.display(state)
233
    print("available moves: {}".format(game.actions(state)))
234
    print("")
235
    move = None
236
    if game.actions(state):
237
        move_string = input('Your move? ')
238
        try:
239
            move = eval(move_string)
240
        except NameError:
241
            move = move_string
242
    else:
243
        print('no legal moves: passing turn to next player')
244
    return move
245

246

247
def random_player(game, state):
248
    """A player that chooses a legal move at random."""
249
    return random.choice(game.actions(state)) if game.actions(state) else None
250

251

252
def alpha_beta_player(game, state):
253
    return alpha_beta_search(state, game)
254

255

256
def expect_min_max_player(game, state):
257
    return expect_minmax(state, game)
258

259

260
def mcts_player(game, state):
261
    return monte_carlo_tree_search(state, game)
262

263

264
# ______________________________________________________________________________
265
# Some Sample Games
266

267

268
class Game:
269
    """A game is similar to a problem, but it has a utility for each
270
    state and a terminal test instead of a path cost and a goal
271
    test. To create a game, subclass this class and implement actions,
272
    result, utility, and terminal_test. You may override display and
273
    successors or you can inherit their default methods. You will also
274
    need to set the .initial attribute to the initial state; this can
275
    be done in the constructor."""
276

277
    def actions(self, state):
278
        """Return a list of the allowable moves at this point."""
279
        raise NotImplementedError
280

281
    def result(self, state, move):
282
        """Return the state that results from making a move from a state."""
283
        raise NotImplementedError
284

285
    def utility(self, state, player):
286
        """Return the value of this final state to player."""
287
        raise NotImplementedError
288

289
    def terminal_test(self, state):
290
        """Return True if this is a final state for the game."""
291
        return not self.actions(state)
292

293
    def to_move(self, state):
294
        """Return the player whose move it is in this state."""
295
        return state.to_move
296

297
    def display(self, state):
298
        """Print or otherwise display the state."""
299
        print(state)
300

301
    def __repr__(self):
302
        return '<{}>'.format(self.__class__.__name__)
303

304
    def play_game(self, *players):
305
        """Play an n-person, move-alternating game."""
306
        state = self.initial
307
        while True:
308
            for player in players:
309
                move = player(self, state)
310
                state = self.result(state, move)
311
                if self.terminal_test(state):
312
                    self.display(state)
313
                    return self.utility(state, self.to_move(self.initial))
314

315

316
class StochasticGame(Game):
317
    """A stochastic game includes uncertain events which influence
318
    the moves of players at each state. To create a stochastic game, subclass
319
    this class and implement chances and outcome along with the other
320
    unimplemented game class methods."""
321

322
    def chances(self, state):
323
        """Return a list of all possible uncertain events at a state."""
324
        raise NotImplementedError
325

326
    def outcome(self, state, chance):
327
        """Return the state which is the outcome of a chance trial."""
328
        raise NotImplementedError
329

330
    def probability(self, chance):
331
        """Return the probability of occurrence of a chance."""
332
        raise NotImplementedError
333

334
    def play_game(self, *players):
335
        """Play an n-person, move-alternating stochastic game."""
336
        state = self.initial
337
        while True:
338
            for player in players:
339
                chance = random.choice(self.chances(state))
340
                state = self.outcome(state, chance)
341
                move = player(self, state)
342
                state = self.result(state, move)
343
                if self.terminal_test(state):
344
                    self.display(state)
345
                    return self.utility(state, self.to_move(self.initial))
346

347

348
class Fig52Game(Game):
349
    """The game represented in [Figure 5.2]. Serves as a simple test case."""
350

351
    succs = dict(A=dict(a1='B', a2='C', a3='D'),
352
                 B=dict(b1='B1', b2='B2', b3='B3'),
353
                 C=dict(c1='C1', c2='C2', c3='C3'),
354
                 D=dict(d1='D1', d2='D2', d3='D3'))
355
    utils = dict(B1=3, B2=12, B3=8, C1=2, C2=4, C3=6, D1=14, D2=5, D3=2)
356
    initial = 'A'
357

358
    def actions(self, state):
359
        return list(self.succs.get(state, {}).keys())
360

361
    def result(self, state, move):
362
        return self.succs[state][move]
363

364
    def utility(self, state, player):
365
        if player == 'MAX':
366
            return self.utils[state]
367
        else:
368
            return -self.utils[state]
369

370
    def terminal_test(self, state):
371
        return state not in ('A', 'B', 'C', 'D')
372

373
    def to_move(self, state):
374
        return 'MIN' if state in 'BCD' else 'MAX'
375

376

377
class Fig52Extended(Game):
378
    """Similar to Fig52Game but bigger. Useful for visualisation"""
379

380
    succs = {i: dict(l=i * 3 + 1, m=i * 3 + 2, r=i * 3 + 3) for i in range(13)}
381
    utils = dict()
382

383
    def actions(self, state):
384
        return sorted(list(self.succs.get(state, {}).keys()))
385

386
    def result(self, state, move):
387
        return self.succs[state][move]
388

389
    def utility(self, state, player):
390
        if player == 'MAX':
391
            return self.utils[state]
392
        else:
393
            return -self.utils[state]
394

395
    def terminal_test(self, state):
396
        return state not in range(13)
397

398
    def to_move(self, state):
399
        return 'MIN' if state in {1, 2, 3} else 'MAX'
400

401

402
class TicTacToe(Game):
403
    """Play TicTacToe on an h x v board, with Max (first player) playing 'X'.
404
    A state has the player to move, a cached utility, a list of moves in
405
    the form of a list of (x, y) positions, and a board, in the form of
406
    a dict of {(x, y): Player} entries, where Player is 'X' or 'O'."""
407

408
    def __init__(self, h=3, v=3, k=3):
409
        self.h = h
410
        self.v = v
411
        self.k = k
412
        moves = [(x, y) for x in range(1, h + 1)
413
                 for y in range(1, v + 1)]
414
        self.initial = GameState(to_move='X', utility=0, board={}, moves=moves)
415

416
    def actions(self, state):
417
        """Legal moves are any square not yet taken."""
418
        return state.moves
419

420
    def result(self, state, move):
421
        if move not in state.moves:
422
            return state  # Illegal move has no effect
423
        board = state.board.copy()
424
        board[move] = state.to_move
425
        moves = list(state.moves)
426
        moves.remove(move)
427
        return GameState(to_move=('O' if state.to_move == 'X' else 'X'),
428
                         utility=self.compute_utility(board, move, state.to_move),
429
                         board=board, moves=moves)
430

431
    def utility(self, state, player):
432
        """Return the value to player; 1 for win, -1 for loss, 0 otherwise."""
433
        return state.utility if player == 'X' else -state.utility
434

435
    def terminal_test(self, state):
436
        """A state is terminal if it is won or there are no empty squares."""
437
        return state.utility != 0 or len(state.moves) == 0
438

439
    def display(self, state):
440
        board = state.board
441
        for x in range(1, self.h + 1):
442
            for y in range(1, self.v + 1):
443
                print(board.get((x, y), '.'), end=' ')
444
            print()
445

446
    def compute_utility(self, board, move, player):
447
        """If 'X' wins with this move, return 1; if 'O' wins return -1; else return 0."""
448
        if (self.k_in_row(board, move, player, (0, 1)) or
449
                self.k_in_row(board, move, player, (1, 0)) or
450
                self.k_in_row(board, move, player, (1, -1)) or
451
                self.k_in_row(board, move, player, (1, 1))):
452
            return +1 if player == 'X' else -1
453
        else:
454
            return 0
455

456
    def k_in_row(self, board, move, player, delta_x_y):
457
        """Return true if there is a line through move on board for player."""
458
        (delta_x, delta_y) = delta_x_y
459
        x, y = move
460
        n = 0  # n is number of moves in row
461
        while board.get((x, y)) == player:
462
            n += 1
463
            x, y = x + delta_x, y + delta_y
464
        x, y = move
465
        while board.get((x, y)) == player:
466
            n += 1
467
            x, y = x - delta_x, y - delta_y
468
        n -= 1  # Because we counted move itself twice
469
        return n >= self.k
470

471

472
class ConnectFour(TicTacToe):
473
    """A TicTacToe-like game in which you can only make a move on the bottom
474
    row, or in a square directly above an occupied square.  Traditionally
475
    played on a 7x6 board and requiring 4 in a row."""
476

477
    def __init__(self, h=7, v=6, k=4):
478
        TicTacToe.__init__(self, h, v, k)
479

480
    def actions(self, state):
481
        return [(x, y) for (x, y) in state.moves
482
                if y == 1 or (x, y - 1) in state.board]
483

484

485
class Backgammon(StochasticGame):
486
    """A two player game where the goal of each player is to move all the
487
	checkers off the board. The moves for each state are determined by
488
	rolling a pair of dice."""
489

490
    def __init__(self):
491
        """Initial state of the game"""
492
        point = {'W': 0, 'B': 0}
493
        board = [point.copy() for index in range(24)]
494
        board[0]['B'] = board[23]['W'] = 2
495
        board[5]['W'] = board[18]['B'] = 5
496
        board[7]['W'] = board[16]['B'] = 3
497
        board[11]['B'] = board[12]['W'] = 5
498
        self.allow_bear_off = {'W': False, 'B': False}
499
        self.direction = {'W': -1, 'B': 1}
500
        self.initial = StochasticGameState(to_move='W',
501
                                           utility=0,
502
                                           board=board,
503
                                           moves=self.get_all_moves(board, 'W'), chance=None)
504

505
    def actions(self, state):
506
        """Return a list of legal moves for a state."""
507
        player = state.to_move
508
        moves = state.moves
509
        if len(moves) == 1 and len(moves[0]) == 1:
510
            return moves
511
        legal_moves = []
512
        for move in moves:
513
            board = copy.deepcopy(state.board)
514
            if self.is_legal_move(board, move, state.chance, player):
515
                legal_moves.append(move)
516
        return legal_moves
517

518
    def result(self, state, move):
519
        board = copy.deepcopy(state.board)
520
        player = state.to_move
521
        self.move_checker(board, move[0], state.chance[0], player)
522
        if len(move) == 2:
523
            self.move_checker(board, move[1], state.chance[1], player)
524
        to_move = ('W' if player == 'B' else 'B')
525
        return StochasticGameState(to_move=to_move,
526
                                   utility=self.compute_utility(board, move, player),
527
                                   board=board,
528
                                   moves=self.get_all_moves(board, to_move), chance=None)
529

530
    def utility(self, state, player):
531
        """Return the value to player; 1 for win, -1 for loss, 0 otherwise."""
532
        return state.utility if player == 'W' else -state.utility
533

534
    def terminal_test(self, state):
535
        """A state is terminal if one player wins."""
536
        return state.utility != 0
537

538
    def get_all_moves(self, board, player):
539
        """All possible moves for a player i.e. all possible ways of
540
        choosing two checkers of a player from the board for a move
541
        at a given state."""
542
        all_points = board
543
        taken_points = [index for index, point in enumerate(all_points)
544
                        if point[player] > 0]
545
        if self.checkers_at_home(board, player) == 1:
546
            return [(taken_points[0],)]
547
        moves = list(itertools.permutations(taken_points, 2))
548
        moves = moves + [(index, index) for index, point in enumerate(all_points)
549
                         if point[player] >= 2]
550
        return moves
551

552
    def display(self, state):
553
        """Display state of the game."""
554
        board = state.board
555
        player = state.to_move
556
        print("current state : ")
557
        for index, point in enumerate(board):
558
            print("point : ", index, "	W : ", point['W'], "    B : ", point['B'])
559
        print("to play : ", player)
560

561
    def compute_utility(self, board, move, player):
562
        """If 'W' wins with this move, return 1; if 'B' wins return -1; else return 0."""
563
        util = {'W': 1, 'B': -1}
564
        for idx in range(0, 24):
565
            if board[idx][player] > 0:
566
                return 0
567
        return util[player]
568

569
    def checkers_at_home(self, board, player):
570
        """Return the no. of checkers at home for a player."""
571
        sum_range = range(0, 7) if player == 'W' else range(17, 24)
572
        count = 0
573
        for idx in sum_range:
574
            count = count + board[idx][player]
575
        return count
576

577
    def is_legal_move(self, board, start, steps, player):
578
        """Move is a tuple which contains starting points of checkers to be
579
		moved during a player's turn. An on-board move is legal if both the destinations
580
		are open. A bear-off move is the one where a checker is moved off-board.
581
        It is legal only after a player has moved all his checkers to his home."""
582
        dest1, dest2 = vector_add(start, steps)
583
        dest_range = range(0, 24)
584
        move1_legal = move2_legal = False
585
        if dest1 in dest_range:
586
            if self.is_point_open(player, board[dest1]):
587
                self.move_checker(board, start[0], steps[0], player)
588
                move1_legal = True
589
        else:
590
            if self.allow_bear_off[player]:
591
                self.move_checker(board, start[0], steps[0], player)
592
                move1_legal = True
593
        if not move1_legal:
594
            return False
595
        if dest2 in dest_range:
596
            if self.is_point_open(player, board[dest2]):
597
                move2_legal = True
598
        else:
599
            if self.allow_bear_off[player]:
600
                move2_legal = True
601
        return move1_legal and move2_legal
602

603
    def move_checker(self, board, start, steps, player):
604
        """Move a checker from starting point by a given number of steps"""
605
        dest = start + steps
606
        dest_range = range(0, 24)
607
        board[start][player] -= 1
608
        if dest in dest_range:
609
            board[dest][player] += 1
610
            if self.checkers_at_home(board, player) == 15:
611
                self.allow_bear_off[player] = True
612

613
    def is_point_open(self, player, point):
614
        """A point is open for a player if the no. of opponent's
615
        checkers already present on it is 0 or 1. A player can
616
        move a checker to a point only if it is open."""
617
        opponent = 'B' if player == 'W' else 'W'
618
        return point[opponent] <= 1
619

620
    def chances(self, state):
621
        """Return a list of all possible dice rolls at a state."""
622
        dice_rolls = list(itertools.combinations_with_replacement([1, 2, 3, 4, 5, 6], 2))
623
        return dice_rolls
624

625
    def outcome(self, state, chance):
626
        """Return the state which is the outcome of a dice roll."""
627
        dice = tuple(map((self.direction[state.to_move]).__mul__, chance))
628
        return StochasticGameState(to_move=state.to_move,
629
                                   utility=state.utility,
630
                                   board=state.board,
631
                                   moves=state.moves, chance=dice)
632

633
    def probability(self, chance):
634
        """Return the probability of occurrence of a dice roll."""
635
        return 1 / 36 if chance[0] == chance[1] else 1 / 18
636

637
Product

Resources

Company