Skip to content

Commit

Permalink
Further parameterize pos emb exploration script
Browse files Browse the repository at this point in the history
  • Loading branch information
gkielian committed Nov 17, 2023
1 parent 03607d9 commit 23416ae
Showing 1 changed file with 13 additions and 11 deletions.
24 changes: 13 additions & 11 deletions explorations/test_all_positional_embeddings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@ python3 data/shakespeare_char/prepare.py

# Common settings
max_iterations=3000
dataset="shakespeare"
tensorboard_project="shkspr_tiktoken"

# rope
python3 train.py \
--max_iters "$max_iterations" \
--eval_iters 200 \
--eval_interval 100 \
--log_interval 10 \
--dataset "shakespeare_char" \
--dataset "$dataset" \
--use_rotary_embeddings \
--no-use_abs_pos_embeddings \
--rope_variant "rope" \
--no-use_softmax_variant \
--tensorboard_project "shkspr" \
--tensorboard_project "$tensorboard_project" \
--tensorboard_run_name "rope" \
--block_size 256 \
--out_dir "shkspr_rope" \
Expand All @@ -31,11 +33,11 @@ python3 train.py \
--eval_iters 200 \
--eval_interval 100 \
--log_interval 10 \
--dataset "shakespeare_char" \
--dataset "$dataset" \
--no-use_rotary_embeddings \
--use_abs_pos_embeddings \
--no-use_softmax_variant \
--tensorboard_project "shkspr" \
--tensorboard_project "$tensorboard_project" \
--tensorboard_run_name "abs_pos" \
--block_size 256 \
--out_dir "shkspr_abs_pos" \
Expand All @@ -47,28 +49,28 @@ python3 train.py \
--eval_iters 200 \
--eval_interval 100 \
--log_interval 10 \
--dataset "shakespeare_char" \
--dataset "$dataset" \
--use_rotary_embeddings \
--rope_variant "rope" \
--use_abs_pos_embeddings \
--no-use_softmax_variant \
--tensorboard_project "shkspr" \
--tensorboard_project "$tensorboard_project" \
--tensorboard_run_name "rope_abs_pos" \
--block_size 256 \
--out_dir "shkspr_rope_abs_pos" \
--compile

no positional embeddings
# no positional embeddings
python3 train.py \
--max_iters "$max_iterations" \
--eval_iters 200 \
--eval_interval 100 \
--log_interval 10 \
--dataset "shakespeare_char" \
--dataset "$dataset" \
--no-use_rotary_embeddings \
--no-use_abs_pos_embeddings \
--no-use_softmax_variant \
--tensorboard_project "shkspr" \
--tensorboard_project "$tensorboard_project" \
--tensorboard_run_name "no_pos_emb" \
--block_size 256 \
--out_dir "shkspr_nope" \
Expand All @@ -81,13 +83,13 @@ for i in {2..16..2}; do
--eval_iters 200 \
--eval_interval 100 \
--log_interval 10 \
--dataset "shakespeare_char" \
--dataset "$dataset" \
--use_rotary_embeddings \
--rope_variant "shortrope" \
--shortrope_length "${i}" \
--no-use_abs_pos_embeddings \
--no-use_softmax_variant \
--tensorboard_project "shkspr" \
--tensorboard_project "$tensorboard_project" \
--tensorboard_run_name "shortrope_${i}" \
--block_size 256 \
--out_dir "shkspr_rope_abs_pos_${i}" \
Expand Down

0 comments on commit 23416ae

Please sign in to comment.