Skip to content

Commit

Permalink
Removed: sections Introduction, and Architecture; Added: short ref. t…
Browse files Browse the repository at this point in the history
…o preprint for extended introduction
  • Loading branch information
ricboer0 committed Dec 5, 2024
1 parent 2b60305 commit cf05d73
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 98 deletions.
Binary file removed codebase-architecture.png
Binary file not shown.
Binary file removed flowchart.png
Binary file not shown.
70 changes: 17 additions & 53 deletions paper.bib
Original file line number Diff line number Diff line change
@@ -1,17 +1,3 @@
@misc{machlab_llm_2024,
title = {{LLM} {In}-{Context} {Recall} is {Prompt} {Dependent}},
url = {http://arxiv.org/abs/2404.08865},
abstract = {The proliferation of Large Language Models (LLMs) highlights the critical importance of conducting thorough evaluations to discern their comparative advantages, limitations, and optimal use cases. Particularly important is assessing their capacity to accurately retrieve information included in a given prompt. A model’s ability to do this significantly influences how effectively it can utilize contextual details, thus impacting its practical efficacy and dependability in real-world applications. Our research analyzes the in-context recall performance of various LLMs using the needle-in-a-haystack method. In this approach, a factoid (the “needle”) is embedded within a block of filler text (the “haystack”), which the model is asked to retrieve. We assess the recall performance of each model across various haystack lengths and with varying needle placements to identify performance patterns. This study demonstrates that an LLM’s recall capability is not only contingent upon the prompt’s content but also may be compromised by biases in its training data. Conversely, adjustments to model architecture, training strategy, or fine-tuning can improve performance. Our analysis provides insight into LLM behavior, offering direction for the development of more effective applications of LLMs.},
language = {en},
urldate = {2024-04-26},
publisher = {arXiv},
author = {Machlab, Daniel and Battle, Rick},
month = apr,
year = {2024},
note = {arXiv:2404.08865 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
}

@article{page_prisma_2021,
title = {The {PRISMA} 2020 statement: an updated guideline for reporting systematic reviews},
volume = {10},
Expand Down Expand Up @@ -130,44 +116,6 @@ @article{dijk_artificial_2023
pages = {e072254},
}

@article{hagendorff_human-like_2023,
title = {Human-like intuitive behavior and reasoning biases emerged in large language models but disappeared in {ChatGPT}},
volume = {3},
issn = {2662-8457},
url = {https://www.nature.com/articles/s43588-023-00527-x},
doi = {10.1038/s43588-023-00527-x},
abstract = {Abstract
We design a battery of semantic illusions and cognitive reflection tests, aimed to elicit intuitive yet erroneous responses. We administer these tasks, traditionally used to study reasoning and decision-making in humans, to OpenAI’s generative pre-trained transformer model family. The results show that as the models expand in size and linguistic proficiency they increasingly display human-like intuitive system 1 thinking and associated cognitive errors. This pattern shifts notably with the introduction of ChatGPT models, which tend to respond correctly, avoiding the traps embedded in the tasks. Both ChatGPT-3.5 and 4 utilize the input–output context window to engage in chain-of-thought reasoning, reminiscent of how people use notepads to support their system 2 thinking. Yet, they remain accurate even when prevented from engaging in chain-of-thought reasoning, indicating that their system-1-like next-word generation processes are more accurate than those of older models. Our findings highlight the value of applying psychological methodologies to study large language models, as this can uncover previously undetected emergent characteristics.},
language = {en},
number = {10},
urldate = {2024-05-28},
journal = {Nature Computational Science},
author = {Hagendorff, Thilo and Fabi, Sarah and Kosinski, Michal},
month = oct,
year = {2023},
pages = {833--838},
}

@misc{puigcerver_sparse_2024,
title = {From {Sparse} to {Soft} {Mixtures} of {Experts}},
url = {http://arxiv.org/abs/2308.00951},
doi = {10.48550/arXiv.2308.00951},
abstract = {Sparse mixture of expert architectures (MoEs) scale model capacity without significant increases in training or inference costs. Despite their success, MoEs suffer from a number of issues: training instability, token dropping, inability to scale the number of experts, or ineffective finetuning. In this work, we propose Soft MoE, a fully-differentiable sparse Transformer that addresses these challenges, while maintaining the benefits of MoEs. Soft MoE performs an implicit soft assignment by passing different weighted combinations of all input tokens to each expert. As in other MoEs, experts in Soft MoE only process a subset of the (combined) tokens, enabling larger model capacity (and performance) at lower inference cost. In the context of visual recognition, Soft MoE greatly outperforms dense Transformers (ViTs) and popular MoEs (Tokens Choice and Experts Choice). Furthermore, Soft MoE scales well: Soft MoE Huge/14 with 128 experts in 16 MoE layers has over 40x more parameters than ViT Huge/14, with only 2\% increased inference time, and substantially better quality.},
urldate = {2024-05-28},
publisher = {arXiv},
author = {Puigcerver, Joan and Riquelme, Carlos and Mustafa, Basil and Houlsby, Neil},
month = may,
year = {2024},
note = {arXiv:2308.00951 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning},
}

@article{radhakrishnan_question_nodate,
title = {Question {Decomposition} {Improves} the {Faithfulness} of {Model}-{Generated} {Reasoning}},
language = {en},
author = {Radhakrishnan, Ansh and Nguyen, Karina and Chen, Anna and Chen, Carol and Denison, Carson and Hernandez, Danny and Durmus, Esin and Hubinger, Evan and Kernion, Jackson and Lukošiūtė, Kamilė and Cheng, Newton and Joseph, Nicholas and Schiefer, Nicholas and Rausch, Oliver and McCandlish, Sam and Showk, Sheer El and Lanham, Tamera and Maxwell, Tim and Chandrasekaran, Venkatesa and Hatfield-Dodds, Zac and Kaplan, Jared and Brauner, Jan and Bowman, Samuel R and Perez, Ethan},
}

@article{schiavo_prospero_2019,
title = {{PROSPERO}: {An} {International} {Register} of {Systematic} {Review} {Protocols}},
volume = {38},
Expand Down Expand Up @@ -206,7 +154,7 @@ @misc{prismaid

@misc{prismaid-doc,
author = {Boero, Riccardo},
title = {{prismAId - Documentaiton website}},
title = {{prismAId - Documentation website}},
year = {2024},
publisher = {GitHub},
journal = {GitHub pages},
Expand All @@ -231,4 +179,20 @@ @article{boero_ai-enhanced_2024
year = {2024},
pages = {116},
file = {PDF:/var/home/ribo/Zotero/storage/EMFAJHKW/Boero - 2024 - An AI-Enhanced Systematic Review of Climate Adaptation Costs Approaches and Advancements, 2010–2021.pdf:application/pdf},
}

@misc{boero_extended_2024,
title = {An {Extended} {Introduction} to the `{prismAId}` {Tool}: {Open}-{Source} and {Open} {Science} {AI} for {Systematic} {Reviews}},
shorttitle = {An {Extended} {Introduction} to the `{prismAId}` {Tool}},
url = {https://osf.io/wh8qn},
doi = {10.31222/osf.io/wh8qn},
abstract = {`prismAId` is an open-source tool designed to streamline systematic literature reviews by leveraging generative AI models for information extraction. It offers an accessible, efficient, and replicable method for extracting and analyzing data from scientific literature, eliminating the need for coding expertise. Supporting various review protocols, including PRISMA 2020, `prismAId` is distributed across multiple platforms — Go, Python, Julia, R — and provides user-friendly binaries compatible with Windows, macOS, and Linux. The tool integrates with leading large language models (LLMs) such as OpenAI's GPT series, Google's Gemini, Cohere's Command, and Anthropic's Claude, ensuring comprehensive and up-to-date literature analysis. `prismAId` facilitates systematic reviews, enabling researchers to conduct thorough, fast, and reproducible analyses, thereby advancing open science initiatives.},
language = {en-us},
urldate = {2024-12-05},
publisher = {OSF},
author = {Boero, Riccardo},
month = nov,
year = {2024},
keywords = {Generative AI, Large Language Model, Open Source, Sopen Science, Systematic Literature Review},
file = {OSF Preprint:/var/home/ribo/Zotero/storage/52HDSZ27/Boero - 2024 - An Extended Introduction to the `prismAId` Tool Open-Source and Open Science AI for Systematic Revi.pdf:application/pdf},
}
Loading

0 comments on commit cf05d73

Please sign in to comment.