From 861a72fdaeb9b6d514926433b900cc41f8637fca Mon Sep 17 00:00:00 2001 From: Gregory Kielian Date: Sat, 31 Aug 2024 12:50:36 -0700 Subject: [PATCH] Refactor wte_mapping to handle mixture of data Now handles letter mappings, random values, and numeric values. Also has reasonable ranges for each with control over the radian offset. All values are now default centered around theta = 0. Ranodm values also have an option for initializing to either random values or random unit circle values. --- wte_mapping.py | 87 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/wte_mapping.py b/wte_mapping.py index ff8b840b51..0077293f7d 100644 --- a/wte_mapping.py +++ b/wte_mapping.py @@ -2,66 +2,92 @@ import random import argparse import csv -from typing import Dict, Tuple, List +from typing import List, Tuple, Dict +from rich import print -def generate_letter_mapping(degrees: int) -> Dict[str, Tuple[float, float]]: +def generate_letter_mapping(degrees: int, letter_offset: float) -> Dict[str, Tuple[float, float]]: radians = np.deg2rad(degrees) - cos, sin = np.cos(radians), np.sin(radians) + offset_radians = np.deg2rad(letter_offset) + cos, sin = np.cos(radians + offset_radians), np.sin(radians + offset_radians) + cos_centered, sin_centered = np.cos(offset_radians), np.sin(offset_radians) return { 'H': (cos, sin), - 'M': (1.0, 0.0), + 'M': (cos_centered, sin_centered), 'L': (cos, -sin), 'y': (cos, sin), 'n': (cos, -sin), 's': (cos, sin), - 'a': (1.0, 0.0), + 'a': (cos_centered, sin_centered), 'f': (cos, -sin), } -def random_coordinates(mean: float = 0.0, stdev: float = 0.02) -> Tuple[float, float]: - return random.gauss(mean, stdev), random.gauss(mean, stdev) +def random_value(mean: float = 0.0, stdev: float = 0.02) -> float: + return np.random.normal(mean, stdev) -def map_letter(letter: str, letter_mapping: Dict[str, Tuple[float, float]]) -> Tuple[float, float]: - return letter_mapping.get(letter, random_coordinates()) +def random_value_pair(mean: float = 0.0, stdev: float = 0.02) -> Tuple[float, float]: + return np.random.normal(mean, stdev), np.random.normal(mean, stdev) -def map_numeric(value: float, min_value: float, max_value: float, max_angle_difference: float = 180) -> Tuple[float, float]: - scaled_value = (value - min_value) / (max_value - min_value) - radians = np.pi * (max_angle_difference / 180.0) * scaled_value +def map_numeric_to_circle(value: float, max_angle_difference: float, number_offset: float) -> Tuple[float, float]: + max_radian_difference = np.deg2rad(max_angle_difference) + half_max_radian_difference = max_radian_difference / 2.0 + centered_angle = max_radian_difference * value - half_max_radian_difference + final_angle = centered_angle + np.deg2rad(number_offset) + return np.cos(final_angle), np.sin(final_angle) + +def map_random_to_circle(mean: float, stdev: float, random_offset: float) -> Tuple[float, float]: + radians = random_value(mean, stdev) + radians += np.deg2rad(random_offset) return np.cos(radians), np.sin(radians) +def map_value(value: str, letter_mapping: Dict[str, Tuple[float, float]], max_angle_difference: float, letter_offset: float, number_offset: float, random_offset: float, mean: float, stdev: float, random_value_pair_flag: bool, map_random_to_unit_circle_flag: bool) -> List[float]: + if value.lower() == 'r': + if map_random_to_unit_circle_flag: + return list(map_random_to_circle(mean, stdev, random_offset)) + elif random_value_pair_flag: + return list(random_value_pair(mean, stdev)) + else: + return [random_value(mean, stdev)] + try: + numeric_value = float(value) + if 0 <= numeric_value <= 1: + return list(map_numeric_to_circle(numeric_value, max_angle_difference, number_offset)) + else: + raise ValueError("Numeric value out of range [0, 1]") + except ValueError: + return list(letter_mapping.get(value, (random_value(mean, stdev), random_value(mean, stdev)))) + def load_csv(file_path: str) -> List[List[str]]: with open(file_path, newline='') as csvfile: return list(csv.reader(csvfile)) -def map_table(table: List[List[str]], mode: str, letter_mapping: Dict[str, Tuple[float, float]], - min_value: float, max_value: float, max_angle_difference: float) -> np.ndarray: - if mode == 'letters': - return np.array([[coord for letter in row for coord in map_letter(letter, letter_mapping)] for row in table]) - elif mode == 'numeric': - numeric_table = np.array(table, dtype=float) - return np.array([[x for value in row for x in map_numeric(value, min_value, max_value, max_angle_difference)] - for row in numeric_table]) +def map_table(table: List[List[str]], letter_mapping: Dict[str, Tuple[float, float]], max_angle_difference: float, letter_offset: float, number_offset: float, random_offset: float, mean: float, stdev: float, random_value_pair_flag: bool, map_random_to_unit_circle_flag: bool) -> np.ndarray: + return np.array([sum([map_value(value, letter_mapping, max_angle_difference, letter_offset, number_offset, random_offset, mean, stdev, random_value_pair_flag, map_random_to_unit_circle_flag) for value in row], []) for row in table]) def main(): parser = argparse.ArgumentParser(description='Generate initial_wte.npy from a CSV file.') parser.add_argument('--csv', type=str, required=True, help='Path to the input CSV file.') - parser.add_argument('--mode', type=str, choices=['letters', 'numeric'], default='letters', - help='Mode: "letters" for letter mapping, "numeric" for numeric mapping.') parser.add_argument('--degrees', type=int, default=60, help='Degrees of separation for letters (default: 60)') - parser.add_argument('--min', type=float, default=0.0, help='Minimum value for numeric scaling (default: 0.0)') - parser.add_argument('--max', type=float, default=1.0, help='Maximum value for numeric scaling (default: 1.0)') - parser.add_argument('--max_angle_difference', type=float, default=180.0, - help='Maximum value difference of extremes.') + parser.add_argument('--letter_offset', type=float, default=0.0, help='Offset angle for the letter mapping (default: 0.0)') + parser.add_argument('--number_offset', type=float, default=0.0, help='Offset angle for numeric mapping (default: 0.0)') + parser.add_argument('--random_offset', type=float, default=0.0, help='Offset angle for random value mapping (default: 0.0)') + parser.add_argument('--random_value_pair', default=True, action=argparse.BooleanOptionalAction, help="Use two random values per 'r' input (default: True)") + parser.add_argument('--map_random_to_unit_circle', default=False, action=argparse.BooleanOptionalAction, help="Map 'r' input to unit circle") + parser.add_argument('--mean', type=float, default=0.0, help='Mean for random number generation (default: 0.0)') + parser.add_argument('--stdev', type=float, default=0.02, help='Standard deviation for random number generation (default: 0.02)') + parser.add_argument('--max_angle_difference', type=float, default=180.0, help='Maximum angle difference for numeric mapping (default: 180.0)') + parser.add_argument('--output', type=str, default='initial_wte.npy', help='Output file name (default: initial_wte.npy)') args = parser.parse_args() + letter_mapping = generate_letter_mapping(args.degrees, args.letter_offset) table = load_csv(args.csv) - letter_mapping = generate_letter_mapping(args.degrees) - wte = map_table(table, args.mode, letter_mapping, args.min, args.max, args.max_angle_difference) + wte = map_table(table, letter_mapping, args.max_angle_difference, + args.letter_offset, args.number_offset, args.random_offset, + args.mean, args.stdev, args.random_value_pair, args.map_random_to_unit_circle) print(f"Shape of wte: {wte.shape}") - np.save('initial_wte.npy', wte) - print(f"Saved initial wte with shape {wte.shape} to initial_wte.npy") + np.save(args.output, wte) + print(f"Saved initial wte with shape {wte.shape} to {args.output}") np.set_printoptions(precision=3, suppress=True) print("\nPrint wte (3 decimal places):") @@ -69,3 +95,4 @@ def main(): if __name__ == "__main__": main() +