From 7e867043e180096e7e9d8d81393b2dbe4af1536d Mon Sep 17 00:00:00 2001 From: Gregory Kielian Date: Thu, 16 Nov 2023 13:49:08 -0800 Subject: [PATCH] Implement feedback on RoPE equation implementation Adjusting equation for RoPE, as per discussion: https://github.com/ReaLLMASIC/nanoGPT/pull/35#discussion_r1394478985 --- model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/model.py b/model.py index 6e4006bac4..887ca56ab4 100644 --- a/model.py +++ b/model.py @@ -156,8 +156,8 @@ def __init__(self, config): self.dim = config.n_embd # Register frequencies directly as buffers - self.register_buffer('freq_left', 1.0 / (10000 ** (torch.arange(0, self.dim//2).float() / self.dim//2))) - self.register_buffer('freq_right', 1.0 / (10000 ** (torch.arange(0, self.dim//2).float() / self.dim//2))) + self.register_buffer('freq_left', (10000 ** (torch.arange(0, self.dim//2).float() / self.dim//2))) + self.register_buffer('freq_right',(10000 ** (torch.arange(0, self.dim//2).float() / self.dim//2))) def forward(self, x): seq_len = x.shape[-2] @@ -187,8 +187,8 @@ def __init__(self, config): self.dim = config.n_embd # Generate freqs of size n rather than full dim - self.register_buffer('freq_left', 1.0 / (10000 ** (torch.arange(0, self.n//2).float() / self.n//2))) - self.register_buffer('freq_right', 1.0 / (10000 ** (torch.arange(0, self.n//2).float() / self.n//2))) + self.register_buffer('freq_left', (10000 ** (torch.arange(0, self.n//2).float() / self.n//2))) + self.register_buffer('freq_right', (10000 ** (torch.arange(0, self.n//2).float() / self.n//2))) def forward(self, x): # Step 1: Get the input tensor shape