Skip to content

Commit

Permalink
Update softmax_sweep for flash attn false
Browse files Browse the repository at this point in the history
  • Loading branch information
gkielian committed Sep 18, 2024
1 parent 426a3f3 commit e88d20c
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions softmax_sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@ def get_batch(split):
# model init
gptconf = GPTConfig(
block_size = block_size, # how far back does the model look? i.e. context size
n_layer = 6, n_head = 12, n_embd = 768, # size of the model
n_layer = 3, n_head = 12, n_embd = 768, # size of the model
softmax_variant_attn = variant,
disable_flash_attention = True,
strongermax_use_xmax = True,
strongermax_sum_to_1 = True,
dropout = 0, # for determinism
Expand Down Expand Up @@ -176,7 +177,7 @@ def get_batch(split):
for block_size in block_sizes:
ln1_ln2_row.append(f"{ln1_ln2_timing_results[variant][block_size]:.4f}")
forward_pass_row.append(f"{forward_pass_timing_results[variant][block_size]:.4f}")

ln1_ln2_table.add_row(*ln1_ln2_row)
forward_pass_table.add_row(*forward_pass_row)

Expand Down

0 comments on commit e88d20c

Please sign in to comment.