Bayesian update for range vectords

sebastiannberg · Apr 20, 2024 · 3565205 · 3565205
1 parent 90c8777
commit 3565205
Show file tree

Hide file tree

Showing 6 changed files with 94 additions and 69 deletions.
diff --git a/deepstack_knock_off/src/games/poker/players/ai_player.py b/deepstack_knock_off/src/games/poker/players/ai_player.py
@@ -1,10 +1,8 @@
 from typing import List
 import numpy as np
-import itertools
 
 from games.poker.poker_state_manager import PokerStateManager
 from games.poker.poker_game import PokerGame
-from games.poker.utils.hand_label_generator import HandLabelGenerator
 from games.poker.players.player import Player
 from games.poker.actions.action import Action
 from games.poker.utils.card import Card
@@ -21,7 +19,7 @@ def __init__(self, name, initial_chips, state_manager: PokerStateManager):
         super().__init__(name, initial_chips)
         self.resolver = Resolver(state_manager)
         self.state_manager = state_manager
-        possible_hands, _, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
+        possible_hands, _, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
         self.r1 = np.full((1, len(possible_hands)), 1/len(possible_hands), dtype=np.float64)
         self.r2 = np.full((1, len(possible_hands)), 1/len(possible_hands), dtype=np.float64)
 

diff --git a/deepstack_knock_off/src/games/poker/poker_oracle.py b/deepstack_knock_off/src/games/poker/poker_oracle.py
@@ -1,4 +1,4 @@
-from typing import List, Tuple, Optional
+from typing import List, Tuple, Optional, Dict
 import numpy as np
 import itertools
 import time
@@ -26,6 +26,11 @@ class PokerOracle:
         "high_card": 1
     }
 
+    _cache_possible_hands: List[Tuple[Card, Card]]
+    _cache_hand_label_to_index: Dict[str, int] = {}
+    _cache_index_to_hand_label: List[str] = []
+    _cache_deck_size: int = 0
+
     @staticmethod
     def gen_deck(num_cards: int, shuffled: bool) -> Deck:
         if num_cards % 4:
@@ -44,6 +49,41 @@ def gen_deck(num_cards: int, shuffled: bool) -> Deck:
             deck.shuffle()
         return deck
 
+    @staticmethod
+    def get_possible_hands_with_indexing(deck_size: int):
+        if PokerOracle._cache_deck_size != deck_size:
+            deck = PokerOracle.gen_deck(num_cards=deck_size, shuffled=False)
+            possible_hands = list(itertools.combinations(deck.cards, 2))
+
+            hand_label_to_index = {}
+            index_to_hand_label = []
+
+            for idx, hand in enumerate(possible_hands):
+                label = HandLabelGenerator.get_hand_label(hand)
+                hand_label_to_index[label] = idx
+                index_to_hand_label.append(label)
+
+            PokerOracle._cache_possible_hands = possible_hands
+            PokerOracle._cache_hand_label_to_index = hand_label_to_index
+            PokerOracle._cache_index_to_hand_label = index_to_hand_label
+            PokerOracle._cache_deck_size = deck_size
+        return PokerOracle._cache_possible_hands, PokerOracle._cache_hand_label_to_index, PokerOracle._cache_index_to_hand_label
+
+    @staticmethod
+    def get_index_from_hand_label(hand_label: str, deck_size: int) -> int:
+        _, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)
+        return hand_label_to_index.get(hand_label, None)
+
+    @staticmethod
+    def get_hand_label_from_index(index: int, deck_size: int) -> str:
+        """
+        Returns the hand label corresponding to the given index
+        """
+        _, _, index_to_hand_label = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)
+        if 0 <= index < len(index_to_hand_label):
+            return index_to_hand_label[index]
+        raise ValueError(f"Index out of valid range: {index}")
+
     @staticmethod
     def gen_utility_matrix(public_cards: List[Card], deck_size: int):
         print("\nStarted gen_utility_matrix")
@@ -54,7 +94,7 @@ def gen_utility_matrix(public_cards: List[Card], deck_size: int):
 
         public_cards_set = set((card.rank, card.suit) for card in public_cards)
 
-        possible_hands, _, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=deck_size)
+        possible_hands, _, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)
 
         # Initialize utility matrix with zeros
         utility_matrix = np.zeros((len(possible_hands), len(possible_hands)), dtype=np.int8)

diff --git a/deepstack_knock_off/src/games/poker/utils/hand_label_generator.py b/deepstack_knock_off/src/games/poker/utils/hand_label_generator.py
@@ -1,52 +1,14 @@
 from typing import List, Dict
 import itertools
 
-from games.poker.poker_oracle import PokerOracle
 from games.poker.utils.card import Card
 
 
 class HandLabelGenerator:
-    _cache_hand_label_to_index: Dict[str, int] = {}
-    _cache_index_to_hand_label: List[str] = []
-    _cache_deck_size: int = 0
-
-    @staticmethod
-    def get_possible_hands_with_indexing(deck_size: int):
-        if HandLabelGenerator._cache_deck_size != deck_size:
-            deck = PokerOracle.gen_deck(num_cards=deck_size, shuffled=False)
-            possible_hands = list(itertools.combinations(deck.cards, 2))
-
-            hand_label_to_index = {}
-            index_to_hand_label = []
-
-            for idx, hand in enumerate(possible_hands):
-                label = HandLabelGenerator.get_hand_label(hand)
-                hand_label_to_index[label] = idx
-                index_to_hand_label.append(label)
-
-            HandLabelGenerator._cache_hand_label_to_index = hand_label_to_index
-            HandLabelGenerator._cache_index_to_hand_label = index_to_hand_label
-            HandLabelGenerator._cache_deck_size = deck_size
-        return possible_hands, HandLabelGenerator._cache_hand_label_to_index, HandLabelGenerator._cache_index_to_hand_label
 
     @staticmethod
     def get_hand_label(cards: List[Card]) -> str:
         if not all(isinstance(card, Card) for card in cards):
             raise ValueError("All items in the list must be instances of Card")
         sorted_hand = tuple(sorted(cards, key=lambda card: (card.rank, card.suit)))
         return "".join([f"{card.rank}{card.suit}" for card in sorted_hand])
-
-    @staticmethod
-    def get_index_from_hand_label(hand_label: str, deck_size: int) -> int:
-        _, hand_label_to_index, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=deck_size)
-        return hand_label_to_index.get(hand_label, None)
-
-    @staticmethod
-    def get_hand_label_from_index(index: int, deck_size: int) -> str:
-        """
-        Returns the hand label corresponding to the given index
-        """
-        _, _, index_to_hand_label = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=deck_size)
-        if 0 <= index < len(index_to_hand_label):
-            return index_to_hand_label[index]
-        raise ValueError(f"Index out of valid range: {index}")
diff --git a/deepstack_knock_off/src/resolver/resolver.py b/deepstack_knock_off/src/resolver/resolver.py
@@ -1,3 +1,4 @@
+from typing import Dict
 import numpy as np
 import math
 import time
@@ -51,15 +52,17 @@ def build_initial_subtree(self, state: PokerState, end_stage: str, end_depth: in
                     elif child_state.history[-1][2] == "fold" or child_state.stage == "showdown":
                         # Create a Terminal Node for game-ending actions
                         child_node = TerminalNode(child_state, parent=node, stage_depth=next_stage_depth)
-                        child_node.set_utility_matrix(node.utility_matrix, node.hand_label_to_index)
+                        child_node.set_utility_matrix(node.utility_matrix)
                     else:
                         # Continue with a Player Node if still in the same gameplay phase
                         next_player = self.determine_next_player(state=child_state, current_player=node.player, new_stage=False)
                         child_node = PlayerNode(child_state, player=next_player, parent=node, stage_depth=next_stage_depth)
-                        child_node.set_utility_matrix(node.utility_matrix, node.hand_label_to_index)
+                        child_node.set_utility_matrix(node.utility_matrix)
                     edge_value = child_state.history[-1][2]
                     node.add_child(child_node, edge_value)
                     queue.append((child_node, next_stage_depth))
+                # We can initialize the strategy matrix now that the children are connected to the parent
+                node.init_strategy_matrix(deck_size=self.state_manager.poker_rules["deck_size"])
 
             elif isinstance(node, ChanceNode):
                 # Handle chance node children, initiating new stage
@@ -139,27 +142,22 @@ def print_tree_path(self, path):
                 print(edge)
                 print("  |  ")
 
-    def bayesian_range_update(self, range_prior, action, node_strategy, action_to_index):
-        # Manually
-        possible_hands, hand_label_to_index, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
+    def bayesian_range_update(self, range_prior: np.ndarray, action: str, strategy_matrix: np.ndarray, action_to_index: Dict[str, int]):
+        updated_range = np.copy(range_prior)
+        possible_hands, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
         for hand in possible_hands:
             hand_label = HandLabelGenerator.get_hand_label(hand)
-            print(hand_label)
-            prob_action_hand = node_strategy[hand_label_to_index[hand_label], action_to_index[action]]
-            print(prob_action_hand)
+            prob_action_hand = strategy_matrix[hand_label_to_index[hand_label], action_to_index[action]]
             prob_hand = range_prior[0, hand_label_to_index[hand_label]]
-            print(prob_hand)
-            prob_action = np.sum(node_strategy[action_to_index[action]])
-            print(prob_action)
-            updated_range = (prob_action_hand * prob_hand) / prob_action
-        print()
-        # With numpy TODO
-
+            prob_action = np.sum(strategy_matrix[:, action_to_index[action]]) / np.sum(strategy_matrix)
+            updated_range_value = (prob_action_hand * prob_hand) / prob_action
+            updated_range[0, hand_label_to_index[hand_label]] = updated_range_value
+        # TODO with numpy
         return updated_range
 
     def subtree_traversal_rollout(self, node: Node, r1, r2, end_stage, end_depth):
         # print("range vectors", r1, r2)
-        possible_hands, hand_label_to_index, _ = HandLabelGenerator.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
+        possible_hands, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=self.state_manager.poker_rules["deck_size"])
         if isinstance(node, TerminalNode):
             if node.state.stage == "showdown":
                 print("showdown")
@@ -195,10 +193,10 @@ def subtree_traversal_rollout(self, node: Node, r1, r2, end_stage, end_depth):
             v2 = np.zeros((1, len(possible_hands)), dtype=np.float64)
             for child, action in node.children:
                 if node.player == "player_one":
-                    updated_range = self.bayesian_range_update(r1, action, node.strategy)
+                    updated_range = self.bayesian_range_update(r1, action, node.strategy_matrix, node.action_to_index)
                     v1_action, v2_action = self.subtree_traversal_rollout(child, updated_range, r2, end_stage, end_depth)
                 elif node.player == "player_two":
-                    updated_range = self.bayesian_range_update(r2, action, node.strategy)
+                    updated_range = self.bayesian_range_update(r2, action, node.strategy_matrix, node.action_to_index)
                     v1_action, v2_action = self.subtree_traversal_rollout(child, r1, updated_range, end_stage, end_depth)
                 for hand in possible_hands:
                     hand_label = HandLabelGenerator.get_hand_label(hand)

diff --git a/deepstack_knock_off/src/resolver/subtree/node.py b/deepstack_knock_off/src/resolver/subtree/node.py
@@ -13,11 +13,9 @@ def __init__(self, state: PokerState, parent: Optional['Node'] = None, stage_dep
         # Each element in children is a tuple (child_node, edge_value)
         self.children: List[Tuple['Node', str]] = []
         self.utility_matrix: Optional[np.ndarray] = None
-        self.hand_label_to_index: Optional[Dict] = None
 
     def add_child(self, child_node: 'Node', edge_value: Any):
         self.children.append((child_node, edge_value))
 
-    def set_utility_matrix(self, matrix: np.ndarray, hand_label_to_index: Dict):
+    def set_utility_matrix(self, matrix: np.ndarray):
         self.utility_matrix = matrix
-        self.hand_label_to_index = hand_label_to_index
diff --git a/deepstack_knock_off/src/resolver/subtree/player_node.py b/deepstack_knock_off/src/resolver/subtree/player_node.py
@@ -1,5 +1,7 @@
-from typing import Optional, Any
+from typing import Optional, Dict, List
+import numpy as np
 
+from games.poker.poker_oracle import PokerOracle
 from games.poker.poker_state import PokerState
 from resolver.subtree.node import Node
 
@@ -9,8 +11,35 @@ class PlayerNode(Node):
     def __init__(self, state: PokerState, player: str, parent: Optional[Node] = None, stage_depth: Optional[int] = None):
         super().__init__(state, parent, stage_depth)
         self.player = player
-        self.strategy = None
+        self.strategy_matrix: np.ndarray = None
+        self.hand_label_to_index: Dict[str, int] = None
+        self.action_to_index: Dict[str, int] = None
+        self.index_to_action: List[str] = None
 
-    def get_action_probability(self, hand_label: str, action: str):
-        return 0.5 # TODO
-        # return self.strategy[hand_index, action]
+    def init_strategy_matrix(self, deck_size):
+        # Assuming self.children is available and populated correctly with action tuples
+        if not self.children:
+            raise ValueError("No children actions found for initializing strategy matrix.")
+
+        possible_hands, hand_label_to_index, _ = PokerOracle.get_possible_hands_with_indexing(deck_size=deck_size)
+        all_actions = [child[1] for child in self.children]
+
+        # Set hand label to index mapping
+        self.hand_label_to_index = hand_label_to_index
+
+        # Set action to index mappings
+        self.action_to_index = {action: idx for idx, action in enumerate(all_actions)}
+        self.index_to_action = all_actions
+
+        if len(all_actions) == 0:
+            raise ValueError("No legal actions available to initialize strategy matrix.")
+
+        # Initialize strategy matrix to uniform probability distribution
+        self.strategy_matrix = np.full((len(possible_hands), len(all_actions)), 1/len(all_actions))
+
+    def get_action_probability(self, hand_label: str, action: str) -> float:
+        hand_index = self.hand_label_to_index[hand_label]
+        action_index = self.action_to_index[action]
+        if action_index is None:
+            raise ValueError(f"Action '{action}' not recognized.")
+        return self.strategy_matrix[hand_index, action_index]