Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aimacode
GitHub Repository: aimacode/aima-python
Path: blob/master/games4e.ipynb
615 views
Kernel: Python 3

Game Tree Search

We start with defining the abstract class Game, for turn-taking n-player games. We rely on, but do not define yet, the concept of a state of the game; we'll see later how individual games define states. For now, all we require is that a state has a state.to_move attribute, which gives the name of the player whose turn it is. ("Name" will be something like 'X' or 'O' for tic-tac-toe.)

We also define play_game, which takes a game and a dictionary of {player_name: strategy_function} pairs, and plays out the game, on each turn checking state.to_move to see whose turn it is, and then getting the strategy function for that player and applying it to the game and the state to get a move.

from collections import namedtuple, Counter, defaultdict import random import math import functools cache = functools.lru_cache(10**6)
class Game: """A game is similar to a problem, but it has a terminal test instead of a goal test, and a utility for each terminal state. To create a game, subclass this class and implement `actions`, `result`, `is_terminal`, and `utility`. You will also need to set the .initial attribute to the initial state; this can be done in the constructor.""" def actions(self, state): """Return a collection of the allowable moves from this state.""" raise NotImplementedError def result(self, state, move): """Return the state that results from making a move from a state.""" raise NotImplementedError def is_terminal(self, state): """Return True if this is a final state for the game.""" return not self.actions(state) def utility(self, state, player): """Return the value of this final state to player.""" raise NotImplementedError def play_game(game, strategies: dict, verbose=False): """Play a turn-taking game. `strategies` is a {player_name: function} dict, where function(state, game) is used to get the player's move.""" state = game.initial while not game.is_terminal(state): player = state.to_move move = strategies[player](game, state) state = game.result(state, move) if verbose: print('Player', player, 'move:', move) print(state) return state

Minimax-Based Game Search Algorithms

We will define several game search algorithms. Each takes two inputs, the game we are playing and the current state of the game, and returns a a (value, move) pair, where value is the utility that the algorithm computes for the player whose turn it is to move, and move is the move itself.

First we define minimax_search, which exhaustively searches the game tree to find an optimal move (assuming both players play optimally), and alphabeta_search, which does the same computation, but prunes parts of the tree that could not possibly have an affect on the optimnal move.

def minimax_search(game, state): """Search game tree to determine best move; return (value, move) pair.""" player = state.to_move def max_value(state): if game.is_terminal(state): return game.utility(state, player), None v, move = -infinity, None for a in game.actions(state): v2, _ = min_value(game.result(state, a)) if v2 > v: v, move = v2, a return v, move def min_value(state): if game.is_terminal(state): return game.utility(state, player), None v, move = +infinity, None for a in game.actions(state): v2, _ = max_value(game.result(state, a)) if v2 < v: v, move = v2, a return v, move return max_value(state) infinity = math.inf def alphabeta_search(game, state): """Search game to determine best action; use alpha-beta pruning. As in [Figure 5.7], this version searches all the way to the leaves.""" player = state.to_move def max_value(state, alpha, beta): if game.is_terminal(state): return game.utility(state, player), None v, move = -infinity, None for a in game.actions(state): v2, _ = min_value(game.result(state, a), alpha, beta) if v2 > v: v, move = v2, a alpha = max(alpha, v) if v >= beta: return v, move return v, move def min_value(state, alpha, beta): if game.is_terminal(state): return game.utility(state, player), None v, move = +infinity, None for a in game.actions(state): v2, _ = max_value(game.result(state, a), alpha, beta) if v2 < v: v, move = v2, a beta = min(beta, v) if v <= alpha: return v, move return v, move return max_value(state, -infinity, +infinity)

A Simple Game: Tic-Tac-Toe

We have the notion of an abstract game, we have some search functions; now it is time to define a real game; a simple one, tic-tac-toe. Moves are (x, y) pairs denoting squares, where (0, 0) is the top left, and (2, 2) is the bottom right (on a board of size height=width=3).

class TicTacToe(Game): """Play TicTacToe on an `height` by `width` board, needing `k` in a row to win. 'X' plays first against 'O'.""" def __init__(self, height=3, width=3, k=3): self.k = k # k in a row self.squares = {(x, y) for x in range(width) for y in range(height)} self.initial = Board(height=height, width=width, to_move='X', utility=0) def actions(self, board): """Legal moves are any square not yet taken.""" return self.squares - set(board) def result(self, board, square): """Place a marker for current player on square.""" player = board.to_move board = board.new({square: player}, to_move=('O' if player == 'X' else 'X')) win = k_in_row(board, player, square, self.k) board.utility = (0 if not win else +1 if player == 'X' else -1) return board def utility(self, board, player): """Return the value to player; 1 for win, -1 for loss, 0 otherwise.""" return board.utility if player == 'X' else -board.utility def is_terminal(self, board): """A board is a terminal state if it is won or there are no empty squares.""" return board.utility != 0 or len(self.squares) == len(board) def display(self, board): print(board) def k_in_row(board, player, square, k): """True if player has k pieces in a line through square.""" def in_row(x, y, dx, dy): return 0 if board[x, y] != player else 1 + in_row(x + dx, y + dy, dx, dy) return any(in_row(*square, dx, dy) + in_row(*square, -dx, -dy) - 1 >= k for (dx, dy) in ((0, 1), (1, 0), (1, 1), (1, -1)))

States in tic-tac-toe (and other games) will be represented as a Board, which is a subclass of defaultdict that in general will consist of {(x, y): contents} pairs, for example {(0, 0): 'X', (1, 1): 'O'} might be the state of the board after two moves. Besides the contents of squares, a board also has some attributes:

  • .to_move to name the player whose move it is;

  • .width and .height to give the size of the board (both 3 in tic-tac-toe, but other numbers in related games);

  • possibly other attributes, as specified by keywords.

As a defaultdict, the Board class has a __missing__ method, which returns empty for squares that have no been assigned but are within the width × height boundaries, or off otherwise. The class has a __hash__ method, so instances can be stored in hash tables.

class Board(defaultdict): """A board has the player to move, a cached utility value, and a dict of {(x, y): player} entries, where player is 'X' or 'O'.""" empty = '.' off = '#' def __init__(self, width=8, height=8, to_move=None, **kwds): self.__dict__.update(width=width, height=height, to_move=to_move, **kwds) def new(self, changes: dict, **kwds) -> 'Board': "Given a dict of {(x, y): contents} changes, return a new Board with the changes." board = Board(width=self.width, height=self.height, **kwds) board.update(self) board.update(changes) return board def __missing__(self, loc): x, y = loc if 0 <= x < self.width and 0 <= y < self.height: return self.empty else: return self.off def __hash__(self): return hash(tuple(sorted(self.items()))) + hash(self.to_move) def __repr__(self): def row(y): return ' '.join(self[x, y] for x in range(self.width)) return '\n'.join(map(row, range(self.height))) + '\n'

Players

We need an interface for players. I'll represent a player as a callable that will be passed two arguments: (game, state) and will return a move. The function player creates a player out of a search algorithm, but you can create your own players as functions, as is done with random_player below:

def random_player(game, state): return random.choice(list(game.actions(state))) def player(search_algorithm): """A game player who uses the specified search algorithm""" return lambda game, state: search_algorithm(game, state)[1]

Playing a Game

We're ready to play a game. I'll set up a match between a random_player (who chooses randomly from the legal moves) and a player(alphabeta_search) (who makes the optimal alpha-beta move; practical for tic-tac-toe, but not for large games). The player(alphabeta_search) will never lose, but if random_player is lucky, it will be a tie.

play_game(TicTacToe(), dict(X=random_player, O=player(alphabeta_search)), verbose=True).utility
Player X move: (0, 0) X . . . . . . . . Player O move: (1, 1) X . . . O . . . . Player X move: (1, 2) X . . . O . . X . Player O move: (0, 1) X . . O O . . X . Player X move: (2, 1) X . . O O X . X . Player O move: (2, 0) X . O O O X . X . Player X move: (2, 2) X . O O O X . X X Player O move: (0, 2) X . O O O X O X X
-1

The alpha-beta player will never lose, but sometimes the random player can stumble into a draw. When two optimal (alpha-beta or minimax) players compete, it will always be a draw:

play_game(TicTacToe(), dict(X=player(alphabeta_search), O=player(minimax_search)), verbose=True).utility
Player X move: (0, 1) . . . X . . . . . Player O move: (0, 0) O . . X . . . . . Player X move: (2, 0) O . X X . . . . . Player O move: (2, 1) O . X X . O . . . Player X move: (1, 2) O . X X . O . X . Player O move: (0, 2) O . X X . O O X . Player X move: (1, 0) O X X X . O O X . Player O move: (1, 1) O X X X O O O X . Player X move: (2, 2) O X X X O O O X X
0

Connect Four

Connect Four is a variant of tic-tac-toe, played on a larger (7 x 6) board, and with the restriction that in any column you can only play in the lowest empty square in the column.

class ConnectFour(TicTacToe): def __init__(self): super().__init__(width=7, height=6, k=4) def actions(self, board): """In each column you can play only the lowest empty square in the column.""" return {(x, y) for (x, y) in self.squares - set(board) if y == board.height - 1 or (x, y + 1) in board}
play_game(ConnectFour(), dict(X=random_player, O=random_player), verbose=True).utility
Player X move: (2, 5) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X . . . . Player O move: (1, 5) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O X . . . . Player X move: (5, 5) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O X . . X . Player O move: (4, 5) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O X . O X . Player X move: (4, 4) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X . . . O X . O X . Player O move: (2, 4) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O . X . . . O X . O X . Player X move: (2, 3) . . . . . . . . . . . . . . . . . . . . . . . X . . . . . . O . X . . . O X . O X . Player O move: (1, 4) . . . . . . . . . . . . . . . . . . . . . . . X . . . . . O O . X . . . O X . O X . Player X move: (0, 5) . . . . . . . . . . . . . . . . . . . . . . . X . . . . . O O . X . . X O X . O X . Player O move: (5, 4) . . . . . . . . . . . . . . . . . . . . . . . X . . . . . O O . X O . X O X . O X . Player X move: (5, 3) . . . . . . . . . . . . . . . . . . . . . . . X . . X . . O O . X O . X O X . O X . Player O move: (6, 5) . . . . . . . . . . . . . . . . . . . . . . . X . . X . . O O . X O . X O X . O X O Player X move: (1, 3) . . . . . . . . . . . . . . . . . . . . . . X X . . X . . O O . X O . X O X . O X O Player O move: (6, 4) . . . . . . . . . . . . . . . . . . . . . . X X . . X . . O O . X O O X O X . O X O Player X move: (5, 2) . . . . . . . . . . . . . . . . . . . X . . X X . . X . . O O . X O O X O X . O X O Player O move: (0, 4) . . . . . . . . . . . . . . . . . . . X . . X X . . X . O O O . X O O X O X . O X O Player X move: (0, 3) . . . . . . . . . . . . . . . . . . . X . X X X . . X . O O O . X O O X O X . O X O Player O move: (0, 2) . . . . . . . . . . . . . . O . . . . X . X X X . . X . O O O . X O O X O X . O X O Player X move: (0, 1) . . . . . . . X . . . . . . O . . . . X . X X X . . X . O O O . X O O X O X . O X O Player O move: (0, 0) O . . . . . . X . . . . . . O . . . . X . X X X . . X . O O O . X O O X O X . O X O Player X move: (5, 1) O . . . . . . X . . . . X . O . . . . X . X X X . . X . O O O . X O O X O X . O X O Player O move: (4, 3) O . . . . . . X . . . . X . O . . . . X . X X X . O X . O O O . X O O X O X . O X O Player X move: (6, 3) O . . . . . . X . . . . X . O . . . . X . X X X . O X X O O O . X O O X O X . O X O Player O move: (5, 0) O . . . . O . X . . . . X . O . . . . X . X X X . O X X O O O . X O O X O X . O X O Player X move: (3, 5) O . . . . O . X . . . . X . O . . . . X . X X X . O X X O O O . X O O X O X X O X O Player O move: (1, 2) O . . . . O . X . . . . X . O O . . . X . X X X . O X X O O O . X O O X O X X O X O Player X move: (1, 1) O . . . . O . X X . . . X . O O . . . X . X X X . O X X O O O . X O O X O X X O X O Player O move: (3, 4) O . . . . O . X X . . . X . O O . . . X . X X X . O X X O O O O X O O X O X X O X O
-1

Transposition Tables

By treating the game tree as a tree, we can arrive at the same state through different paths, and end up duplicating effort. In state-space search, we kept a table of reached states to prevent this. For game-tree search, we can achieve the same effect by applying the @cache decorator to the min_value and max_value functions. We'll use the suffix _tt to indicate a function that uses these transisiton tables.

def minimax_search_tt(game, state): """Search game to determine best move; return (value, move) pair.""" player = state.to_move @cache def max_value(state): if game.is_terminal(state): return game.utility(state, player), None v, move = -infinity, None for a in game.actions(state): v2, _ = min_value(game.result(state, a)) if v2 > v: v, move = v2, a return v, move @cache def min_value(state): if game.is_terminal(state): return game.utility(state, player), None v, move = +infinity, None for a in game.actions(state): v2, _ = max_value(game.result(state, a)) if v2 < v: v, move = v2, a return v, move return max_value(state)

For alpha-beta search, we can still use a cache, but it should be based just on the state, not on whatever values alpha and beta have.

def cache1(function): "Like lru_cache(None), but only considers the first argument of function." cache = {} def wrapped(x, *args): if x not in cache: cache[x] = function(x, *args) return cache[x] return wrapped def alphabeta_search_tt(game, state): """Search game to determine best action; use alpha-beta pruning. As in [Figure 5.7], this version searches all the way to the leaves.""" player = state.to_move @cache1 def max_value(state, alpha, beta): if game.is_terminal(state): return game.utility(state, player), None v, move = -infinity, None for a in game.actions(state): v2, _ = min_value(game.result(state, a), alpha, beta) if v2 > v: v, move = v2, a alpha = max(alpha, v) if v >= beta: return v, move return v, move @cache1 def min_value(state, alpha, beta): if game.is_terminal(state): return game.utility(state, player), None v, move = +infinity, None for a in game.actions(state): v2, _ = max_value(game.result(state, a), alpha, beta) if v2 < v: v, move = v2, a beta = min(beta, v) if v <= alpha: return v, move return v, move return max_value(state, -infinity, +infinity)
%time play_game(TicTacToe(), {'X':player(alphabeta_search_tt), 'O':player(minimax_search_tt)})
CPU times: user 593 ms, sys: 52 ms, total: 645 ms Wall time: 655 ms
O X X X O O O X X
%time play_game(TicTacToe(), {'X':player(alphabeta_search), 'O':player(minimax_search)})
CPU times: user 3.07 s, sys: 30.7 ms, total: 3.1 s Wall time: 3.15 s
O X X X O O O X X

Heuristic Cutoffs

def cutoff_depth(d): """A cutoff function that searches to depth d.""" return lambda game, state, depth: depth > d def h_alphabeta_search(game, state, cutoff=cutoff_depth(6), h=lambda s, p: 0): """Search game to determine best action; use alpha-beta pruning. As in [Figure 5.7], this version searches all the way to the leaves.""" player = state.to_move @cache1 def max_value(state, alpha, beta, depth): if game.is_terminal(state): return game.utility(state, player), None if cutoff(game, state, depth): return h(state, player), None v, move = -infinity, None for a in game.actions(state): v2, _ = min_value(game.result(state, a), alpha, beta, depth+1) if v2 > v: v, move = v2, a alpha = max(alpha, v) if v >= beta: return v, move return v, move @cache1 def min_value(state, alpha, beta, depth): if game.is_terminal(state): return game.utility(state, player), None if cutoff(game, state, depth): return h(state, player), None v, move = +infinity, None for a in game.actions(state): v2, _ = max_value(game.result(state, a), alpha, beta, depth + 1) if v2 < v: v, move = v2, a beta = min(beta, v) if v <= alpha: return v, move return v, move return max_value(state, -infinity, +infinity, 0)
%time play_game(TicTacToe(), {'X':player(h_alphabeta_search), 'O':player(h_alphabeta_search)})
CPU times: user 367 ms, sys: 7.9 ms, total: 375 ms Wall time: 375 ms
O X X X O O O X X
%time play_game(ConnectFour(), {'X':player(h_alphabeta_search), 'O':random_player}, verbose=True).utility
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X O . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X . . . . . X O . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O X . . . . . X O . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O X . . . . X X O . . . . . . . . . . . . . . . . . . . . . . . . . . O . . . . . . O X . . . . X X O . . . . . . . . . . . . . . . . . . . . . . . . . . O . . . . . X O X . . . . X X O . . . . . . . . . . . . . . . . . . . . . . . . . . O . . . . . X O X . O . . X X O . . . . . . . . . . . . . . . . . . . . . . . . . . O . . X . . X O X . O . . X X O . . . . . . . . . . . . . . . . . . . . . . . . . . O O . X . . X O X . O . . X X O . . . . . . . . . . . . . . . . . . . . . . . . . X O O . X . . X O X . O . . X X O . . . . . . . . . . . . . . . . . . . . . . . . . X O O . X . . X O X . O . O X X O . . . . . . . . . . . . . . . . . . . X . . . . . X O O . X . . X O X . O . O X X O . . . . . . . . . . . . O . . . . . . X . . . . . X O O . X . . X O X . O . O X X O . . . . . . . . . . . . O . . . . . . X . . X . . X O O . X . . X O X . O . O X X O . . . . . . . . . . . . O . . . . . . X O . X . . X O O . X . . X O X . O . O X X O . . . . . . . . . . . . O . . X . . . X O . X . . X O O . X . . X O X . O . O X X O . . . . . . . . . . . . O . . X . . . X O . X . . X O O . X . . X O X . O O O X X O . . . . . . . . . . . . O . . X . . . X O . X . . X O O . X . . X O X X O O O X X O . . . . . . . . . . . . O O . X . . . X O . X . . X O O . X . . X O X X O O O X X O . . . . . . X . . . . . O O . X . . . X O . X . . X O O . X . . X O X X O O O X X O . . . . . . X . . . . . O O . X . . . X O . X . . X O O O X . . X O X X O O O X X O . . . . . X X . . . . . O O . X . . . X O . X . . X O O O X . . X O X X O O O X X O . . . . . X X . . . . . O O . X . . . X O . X . . X O O O X . O X O X X O O O X X O . . . . . X X . . . . . O O . X . . . X O . X . X X O O O X . O X O X X O O O X X O . . . . . X X . . . . . O O . X . . O X O . X . X X O O O X . O X O X X O O O X X O . . . . . X X . . . . . O O . X . X O X O . X . X X O O O X . O X O X X O O O X X O . . . . . X X . . . . O O O . X . X O X O . X . X X O O O X . O X O X X O O O X X O . . . . . X X . . . X O O O . X . X O X O . X . X X O O O X . O X O X X O O O X X O . . . . O X X . . . X O O O . X . X O X O . X . X X O O O X . O X O X X O O O X X O . . . X O X X . . . X O O O . X . X O X O . X . X X O O O X . O X O X X O O O X X O CPU times: user 8.82 s, sys: 146 ms, total: 8.96 s Wall time: 9.19 s
1
%time play_game(ConnectFour(), {'X':player(h_alphabeta_search), 'O':player(h_alphabeta_search)}, verbose=True).utility
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O . . . . . . X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O . . . . . X X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O . . . O . X X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X O . . . O . X X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X O . . . O O X X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . X O . . X O O X X . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . O . . X O . . X O O X X . . . . . . . . . . . . . . . . . . . . . . . X . . . . . . O . . X O . . X O O X X . . . . . . . . . . . . . . . . O . . . . . . X . . . . . . O . . X O . . X O O X X . . . . . . . . . . . . . . . . O . . . . . . X . . . . . . O . . X O . X X O O X X . . . . . . . . . . . . . . . . O . . . . . . X . . . . . O O . . X O . X X O O X X . . . . . . . . . . . . . . . . O . . . . . . X . . X . . O O . . X O . X X O O X X . . . . . . . . . . . . . . . . O . . O . . . X . . X . . O O . . X O . X X O O X X . . . . . . . . . . . . X . . . O . . O . . . X . . X . . O O . . X O . X X O O X X . . . . . . . . . . . . X . . . O . . O . . . X . . X O . O O . . X O . X X O O X X . . . . . . . . . . . . X . . . O . . O X . . X . . X O . O O . . X O . X X O O X X . . . . . . . . . . . . X . . . O . . O X . . X . . X O . O O O . X O . X X O O X X . . . . . . . . . . . . X . . . O . . O X . . X . . X O . O O O X X O . X X O O X X . . . . . . . . . . . . X O . . O . . O X . . X . . X O . O O O X X O . X X O O X X . . . . . . . . . . . . X O . . O . . O X . . X . X X O . O O O X X O . X X O O X X . . . . . . . . . . . . X O . . O . O O X . . X . X X O . O O O X X O . X X O O X X . . . . . . . . . . . X X O . . O . O O X . . X . X X O . O O O X X O . X X O O X X . . . . O . . . . . . X X O . . O . O O X . . X . X X O . O O O X X O . X X O O X X . . . . O . . . . . . X X O . . O . O O X . . X X X X O . O O O X X O . X X O O X X . CPU times: user 12.1 s, sys: 237 ms, total: 12.4 s Wall time: 12.9 s
1
class CountCalls: """Delegate all attribute gets to the object, and count them in ._counts""" def __init__(self, obj): self._object = obj self._counts = Counter() def __getattr__(self, attr): "Delegate to the original object, after incrementing a counter." self._counts[attr] += 1 return getattr(self._object, attr) def report(game, searchers): for searcher in searchers: game = CountCalls(game) searcher(game, game.initial) print('Result states: {:7,d}; Terminal tests: {:7,d}; for {}'.format( game._counts['result'], game._counts['is_terminal'], searcher.__name__)) report(TicTacToe(), (alphabeta_search_tt, alphabeta_search, h_alphabeta_search, minimax_search_tt))
Result states: 6,589; Terminal tests: 3,653; for alphabeta_search_tt Result states: 25,703; Terminal tests: 25,704; for alphabeta_search Result states: 4,687; Terminal tests: 2,805; for h_alphabeta_search Result states: 16,167; Terminal tests: 5,478; for minimax_search_tt

Monte Carlo Tree Search

class Node: def __init__(self, parent, ) def mcts(state, game, N=1000):

Heuristic Search Algorithms

t = CountCalls(TicTacToe()) play_game(t, dict(X=minimax_player, O=minimax_player), verbose=True) t._counts
for tactic in (three, fork, center, opposite_corner, corner, any): for s in squares: if tactic(board, s,player): return s for s ins quares: if tactic(board, s, opponent): return s
def ucb(U, N, C=2**0.5, parentN=100): return round(U/N + C * math.sqrt(math.log(parentN)/N), 2) {C: (ucb(60, 79, C), ucb(1, 10, C), ucb(2, 11, C)) for C in (1.4, 1.5)}
def ucb(U, N, parentN=100, C=2): return U/N + C * math.sqrt(math.log(parentN)/N) C = 1.4 class Node: def __init__(self, name, children=(), U=0, N=0, parent=None, p=0.5): self.__dict__.update(name=name, U=U, N=N, parent=parent, children=children, p=p) for c in children: c.parent = self def __repr__(self): return '{}:{}/{}={:.0%}{}'.format(self.name, self.U, self.N, self.U/self.N, self.children) def select(n): if n.children: return select(max(n.children, key=ucb)) else: return n def back(n, amount): if n: n.N += 1 n.U += amount back(n.parent, 1 - amount) def one(root): n = select(root) amount = int(random.uniform(0, 1) < n.p) back(n, amount) def ucb(n): return (float('inf') if n.N == 0 else n.U / n.N + C * math.sqrt(math.log(n.parent.N)/n.N)) tree = Node('root', [Node('a', p=.8, children=[Node('a1', p=.05), Node('a2', p=.25, children=[Node('a2a', p=.7), Node('a2b')])]), Node('b', p=.5, children=[Node('b1', p=.6, children=[Node('b1a', p=.3), Node('b1b')]), Node('b2', p=.4)]), Node('c', p=.1)]) for i in range(100): one(tree); for c in tree.children: print(c) 'select', select(tree), 'tree', tree
us = (100, 50, 25, 10, 5, 1) infinity = float('inf') @lru_cache(None) def f1(n, denom): return (0 if n == 0 else infinity if n < 0 or not denom else min(1 + f1(n - denom[0], denom), f1(n, denom[1:]))) @lru_cache(None) def f2(n, denom): @lru_cache(None) def f(n): return (0 if n == 0 else infinity if n < 0 else 1 + min(f(n - d) for d in denom)) return f(n) @lru_cache(None) def f3(n, denom): return (0 if n == 0 else infinity if n < 0 or not denom else min(k + f2(n - k * denom[0], denom[1:]) for k in range(1 + n // denom[0]))) def g(n, d=us): return f1(n, d), f2(n, d), f3(n, d) n = 12345 %time f1(n, us) %time f2(n, us) %time f3(n, us)