Skip to content

Commit

Permalink
Bayesian update for range vectords
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastiannberg committed Apr 20, 2024
1 parent 90c8777 commit 3565205
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 69 deletions.
4 changes: 1 addition & 3 deletions deepstack_knock_off/src/games/poker/players/ai_player.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from typing import List
import numpy as np
import itertools

from games.poker.poker_state_manager import PokerStateManager
from games.poker.poker_game import PokerGame
from games.poker.utils.hand_label_generator import HandLabelGenerator
from games.poker.players.player import Player
from games.poker.actions.action import Action
from games.poker.utils.card import Card
Expand All @@ -21,7 +19,7 @@ def __init__(self, name, initial_chips, state_manager: PokerStateManager):
super().__init__(name, initial_chips)
self.resolver = Resolver(state_manager)
self.state_manager = state_manager
possible_hands, _, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
possible_hands, _, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
self.r1 = np.full((1, len(possible_hands)), 1/len(possible_hands), dtype=np.float64)
self.r2 = np.full((1, len(possible_hands)), 1/len(possible_hands), dtype=np.float64)

Expand Down
44 changes: 42 additions & 2 deletions deepstack_knock_off/src/games/poker/poker_oracle.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Tuple, Optional
from typing import List, Tuple, Optional, Dict
import numpy as np
import itertools
import time
Expand Down Expand Up @@ -26,6 +26,11 @@ class PokerOracle:
"high_card": 1
}

_cache_possible_hands: List[Tuple[Card, Card]]
_cache_hand_label_to_index: Dict[str, int] = {}
_cache_index_to_hand_label: List[str] = []
_cache_deck_size: int = 0

@staticmethod
def gen_deck(num_cards: int, shuffled: bool) -> Deck:
if num_cards % 4:
Expand All @@ -44,6 +49,41 @@ def gen_deck(num_cards: int, shuffled: bool) -> Deck:
deck.shuffle()
return deck

@staticmethod
def get_possible_hands_with_indexing(deck_size: int):
if PokerOracle._cache_deck_size != deck_size:
deck = PokerOracle.gen_deck(num_cards=deck_size, shuffled=False)
possible_hands = list(itertools.combinations(deck.cards, 2))

hand_label_to_index = {}
index_to_hand_label = []

for idx, hand in enumerate(possible_hands):
label = HandLabelGenerator.get_hand_label(hand)
hand_label_to_index[label] = idx
index_to_hand_label.append(label)

PokerOracle._cache_possible_hands = possible_hands
PokerOracle._cache_hand_label_to_index = hand_label_to_index
PokerOracle._cache_index_to_hand_label = index_to_hand_label
PokerOracle._cache_deck_size = deck_size
return PokerOracle._cache_possible_hands, PokerOracle._cache_hand_label_to_index, PokerOracle._cache_index_to_hand_label

@staticmethod
def get_index_from_hand_label(hand_label: str, deck_size: int) -> int:
_, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)
return hand_label_to_index.get(hand_label, None)

@staticmethod
def get_hand_label_from_index(index: int, deck_size: int) -> str:
"""
Returns the hand label corresponding to the given index
"""
_, _, index_to_hand_label = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)
if 0 <= index < len(index_to_hand_label):
return index_to_hand_label[index]
raise ValueError(f"Index out of valid range: {index}")

@staticmethod
def gen_utility_matrix(public_cards: List[Card], deck_size: int):
print("\nStarted gen_utility_matrix")
Expand All @@ -54,7 +94,7 @@ def gen_utility_matrix(public_cards: List[Card], deck_size: int):

public_cards_set = set((card.rank, card.suit) for card in public_cards)

possible_hands, _, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=deck_size)
possible_hands, _, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)

# Initialize utility matrix with zeros
utility_matrix = np.zeros((len(possible_hands), len(possible_hands)), dtype=np.int8)
Expand Down
38 changes: 0 additions & 38 deletions deepstack_knock_off/src/games/poker/utils/hand_label_generator.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,14 @@
from typing import List, Dict
import itertools

from games.poker.poker_oracle import PokerOracle
from games.poker.utils.card import Card


class HandLabelGenerator:
_cache_hand_label_to_index: Dict[str, int] = {}
_cache_index_to_hand_label: List[str] = []
_cache_deck_size: int = 0

@staticmethod
def get_possible_hands_with_indexing(deck_size: int):
if HandLabelGenerator._cache_deck_size != deck_size:
deck = PokerOracle.gen_deck(num_cards=deck_size, shuffled=False)
possible_hands = list(itertools.combinations(deck.cards, 2))

hand_label_to_index = {}
index_to_hand_label = []

for idx, hand in enumerate(possible_hands):
label = HandLabelGenerator.get_hand_label(hand)
hand_label_to_index[label] = idx
index_to_hand_label.append(label)

HandLabelGenerator._cache_hand_label_to_index = hand_label_to_index
HandLabelGenerator._cache_index_to_hand_label = index_to_hand_label
HandLabelGenerator._cache_deck_size = deck_size
return possible_hands, HandLabelGenerator._cache_hand_label_to_index, HandLabelGenerator._cache_index_to_hand_label

@staticmethod
def get_hand_label(cards: List[Card]) -> str:
if not all(isinstance(card, Card) for card in cards):
raise ValueError("All items in the list must be instances of Card")
sorted_hand = tuple(sorted(cards, key=lambda card: (card.rank, card.suit)))
return "".join([f"{card.rank}{card.suit}" for card in sorted_hand])

@staticmethod
def get_index_from_hand_label(hand_label: str, deck_size: int) -> int:
_, hand_label_to_index, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=deck_size)
return hand_label_to_index.get(hand_label, None)

@staticmethod
def get_hand_label_from_index(index: int, deck_size: int) -> str:
"""
Returns the hand label corresponding to the given index
"""
_, _, index_to_hand_label = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=deck_size)
if 0 <= index < len(index_to_hand_label):
return index_to_hand_label[index]
raise ValueError(f"Index out of valid range: {index}")
34 changes: 16 additions & 18 deletions deepstack_knock_off/src/resolver/resolver.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Dict
import numpy as np
import math
import time
Expand Down Expand Up @@ -51,15 +52,17 @@ def build_initial_subtree(self, state: PokerState, end_stage: str, end_depth: in
elif child_state.history[-1][2] == "fold" or child_state.stage == "showdown":
# Create a Terminal Node for game-ending actions
child_node = TerminalNode(child_state, parent=node, stage_depth=next_stage_depth)
child_node.set_utility_matrix(node.utility_matrix, node.hand_label_to_index)
child_node.set_utility_matrix(node.utility_matrix)
else:
# Continue with a Player Node if still in the same gameplay phase
next_player = self.determine_next_player(state=child_state, current_player=node.player, new_stage=False)
child_node = PlayerNode(child_state, player=next_player, parent=node, stage_depth=next_stage_depth)
child_node.set_utility_matrix(node.utility_matrix, node.hand_label_to_index)
child_node.set_utility_matrix(node.utility_matrix)
edge_value = child_state.history[-1][2]
node.add_child(child_node, edge_value)
queue.append((child_node, next_stage_depth))
# We can initialize the strategy matrix now that the children are connected to the parent
node.init_strategy_matrix(deck_size=self.state_manager.poker_rules["deck_size"])

elif isinstance(node, ChanceNode):
# Handle chance node children, initiating new stage
Expand Down Expand Up @@ -139,27 +142,22 @@ def print_tree_path(self, path):
print(edge)
print(" | ")

def bayesian_range_update(self, range_prior, action, node_strategy, action_to_index):
# Manually
possible_hands, hand_label_to_index, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
def bayesian_range_update(self, range_prior: np.ndarray, action: str, strategy_matrix: np.ndarray, action_to_index: Dict[str, int]):
updated_range = np.copy(range_prior)
possible_hands, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
for hand in possible_hands:
hand_label = HandLabelGenerator.get_hand_label(hand)
print(hand_label)
prob_action_hand = node_strategy[hand_label_to_index[hand_label], action_to_index[action]]
print(prob_action_hand)
prob_action_hand = strategy_matrix[hand_label_to_index[hand_label], action_to_index[action]]
prob_hand = range_prior[0, hand_label_to_index[hand_label]]
print(prob_hand)
prob_action = np.sum(node_strategy[action_to_index[action]])
print(prob_action)
updated_range = (prob_action_hand * prob_hand) / prob_action
print()
# With numpy TODO

prob_action = np.sum(strategy_matrix[:, action_to_index[action]]) / np.sum(strategy_matrix)
updated_range_value = (prob_action_hand * prob_hand) / prob_action
updated_range[0, hand_label_to_index[hand_label]] = updated_range_value
# TODO with numpy
return updated_range

def subtree_traversal_rollout(self, node: Node, r1, r2, end_stage, end_depth):
# print("range vectors", r1, r2)
possible_hands, hand_label_to_index, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
possible_hands, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
if isinstance(node, TerminalNode):
if node.state.stage == "showdown":
print("showdown")
Expand Down Expand Up @@ -195,10 +193,10 @@ def subtree_traversal_rollout(self, node: Node, r1, r2, end_stage, end_depth):
v2 = np.zeros((1, len(possible_hands)), dtype=np.float64)
for child, action in node.children:
if node.player == "player_one":
updated_range = self.bayesian_range_update(r1, action, node.strategy)
updated_range = self.bayesian_range_update(r1, action, node.strategy_matrix, node.action_to_index)
v1_action, v2_action = self.subtree_traversal_rollout(child, updated_range, r2, end_stage, end_depth)
elif node.player == "player_two":
updated_range = self.bayesian_range_update(r2, action, node.strategy)
updated_range = self.bayesian_range_update(r2, action, node.strategy_matrix, node.action_to_index)
v1_action, v2_action = self.subtree_traversal_rollout(child, r1, updated_range, end_stage, end_depth)
for hand in possible_hands:
hand_label = HandLabelGenerator.get_hand_label(hand)
Expand Down
4 changes: 1 addition & 3 deletions deepstack_knock_off/src/resolver/subtree/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ def __init__(self, state: PokerState, parent: Optional['Node'] = None, stage_dep
# Each element in children is a tuple (child_node, edge_value)
self.children: List[Tuple['Node', str]] = []
self.utility_matrix: Optional[np.ndarray] = None
self.hand_label_to_index: Optional[Dict] = None

def add_child(self, child_node: 'Node', edge_value: Any):
self.children.append((child_node, edge_value))

def set_utility_matrix(self, matrix: np.ndarray, hand_label_to_index: Dict):
def set_utility_matrix(self, matrix: np.ndarray):
self.utility_matrix = matrix
self.hand_label_to_index = hand_label_to_index
39 changes: 34 additions & 5 deletions deepstack_knock_off/src/resolver/subtree/player_node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Optional, Any
from typing import Optional, Dict, List
import numpy as np

from games.poker.poker_oracle import PokerOracle
from games.poker.poker_state import PokerState
from resolver.subtree.node import Node

Expand All @@ -9,8 +11,35 @@ class PlayerNode(Node):
def __init__(self, state: PokerState, player: str, parent: Optional[Node] = None, stage_depth: Optional[int] = None):
super().__init__(state, parent, stage_depth)
self.player = player
self.strategy = None
self.strategy_matrix: np.ndarray = None
self.hand_label_to_index: Dict[str, int] = None
self.action_to_index: Dict[str, int] = None
self.index_to_action: List[str] = None

def get_action_probability(self, hand_label: str, action: str):
return 0.5 # TODO
# return self.strategy[hand_index, action]
def init_strategy_matrix(self, deck_size):
# Assuming self.children is available and populated correctly with action tuples
if not self.children:
raise ValueError("No children actions found for initializing strategy matrix.")

possible_hands, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)
all_actions = [child[1] for child in self.children]

# Set hand label to index mapping
self.hand_label_to_index = hand_label_to_index

# Set action to index mappings
self.action_to_index = {action: idx for idx, action in enumerate(all_actions)}
self.index_to_action = all_actions

if len(all_actions) == 0:
raise ValueError("No legal actions available to initialize strategy matrix.")

# Initialize strategy matrix to uniform probability distribution
self.strategy_matrix = np.full((len(possible_hands), len(all_actions)), 1/len(all_actions))

def get_action_probability(self, hand_label: str, action: str) -> float:
hand_index = self.hand_label_to_index[hand_label]
action_index = self.action_to_index[action]
if action_index is None:
raise ValueError(f"Action '{action}' not recognized.")
return self.strategy_matrix[hand_index, action_index]

0 comments on commit 3565205

Please sign in to comment.