From 62fd3f5c49f4365d353fb22afd9277ffdf55913d Mon Sep 17 00:00:00 2001 From: Kranthi Kiran GV Date: Wed, 31 May 2023 13:27:02 -0400 Subject: [PATCH 1/3] Add citation to readme --- README.md | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ed38678e..69ef9410 100644 --- a/README.md +++ b/README.md @@ -639,15 +639,11 @@ Green: MHA+Rotary+GeGLU+Token_shift. 17.2M params. Blue: MHA_pro (MHA with various tweaks & RWKV-type-FFN) - slow - needs more VRAM - good performance. 16.6M params. ``` -@software{peng_bo_2021_5196578, - author = {PENG Bo}, - title = {BlinkDL/RWKV-LM: 0.01}, - month = aug, - year = 2021, - publisher = {Zenodo}, - version = {0.01}, - doi = {10.5281/zenodo.5196577}, - url = {https://doi.org/10.5281/zenodo.5196577} +@article{peng2023rwkv, + title={RWKV: Reinventing RNNs for the Transformer Era}, + author={Peng, Bo and Alcaide, Eric and Anthony, Quentin and Albalak, Alon and Arcadinho, Samuel and Cao, Huanqi and Cheng, Xin and Chung, Michael and Grella, Matteo and GV, Kranthi Kiran and others}, + journal={arXiv preprint arXiv:2305.13048}, + year={2023} } ``` From ae2cca12e5cf65418b146037c84185de75220db3 Mon Sep 17 00:00:00 2001 From: Kranthi Kiran GV Date: Wed, 31 May 2023 13:34:19 -0400 Subject: [PATCH 2/3] Add link to publication --- README.md | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 69ef9410..ae53102a 100644 --- a/README.md +++ b/README.md @@ -638,15 +638,6 @@ Green: MHA+Rotary+GeGLU+Token_shift. 17.2M params. Blue: MHA_pro (MHA with various tweaks & RWKV-type-FFN) - slow - needs more VRAM - good performance. 16.6M params. -``` -@article{peng2023rwkv, - title={RWKV: Reinventing RNNs for the Transformer Era}, - author={Peng, Bo and Alcaide, Eric and Anthony, Quentin and Albalak, Alon and Arcadinho, Samuel and Cao, Huanqi and Cheng, Xin and Chung, Michael and Grella, Matteo and GV, Kranthi Kiran and others}, - journal={arXiv preprint arXiv:2305.13048}, - year={2023} -} -``` - # Initialization We use careful initialization for RWKV to get fast convergence - orthogonal matrices with proper scaling, and special time_w curves. Check model.py for details. @@ -654,3 +645,20 @@ We use careful initialization for RWKV to get fast convergence - orthogonal matr Some learned time_w examples: ![RWKV-time-w](RWKV-time-w.png) + +# Relevant papers + +If you use RWKV in your research, please consider citing our paper. + +Peng B, Alcaide E, Anthony Q, Albalak A, Arcadinho S, Cao H, Cheng X, Chung M, Grella M, GV KK, He X, et al. RWKV: Reinventing RNNs for the Transformer Era. arXiv preprint arXiv:2305.13048. 2023. + +[Link](https://arxiv.org/abs/2305.13048) to paper. + +``` +@article{peng2023rwkv, + title={RWKV: Reinventing RNNs for the Transformer Era}, + author={Peng, Bo and Alcaide, Eric and Anthony, Quentin and Albalak, Alon and Arcadinho, Samuel and Cao, Huanqi and Cheng, Xin and Chung, Michael and Grella, Matteo and GV, Kranthi Kiran and others}, + journal={arXiv preprint arXiv:2305.13048}, + year={2023} +} +``` \ No newline at end of file From f94ad5552b1b363dcbd9a374319e158f30c1dc0c Mon Sep 17 00:00:00 2001 From: Kranthi Kiran GV Date: Wed, 31 May 2023 13:49:04 -0400 Subject: [PATCH 3/3] Add publications title --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ae53102a..20177f04 100644 --- a/README.md +++ b/README.md @@ -646,7 +646,7 @@ Some learned time_w examples: ![RWKV-time-w](RWKV-time-w.png) -# Relevant papers +# Relevant publications If you use RWKV in your research, please consider citing our paper.