blog setup

enzokro · Sep 23, 2023 · 0134d54 · 0134d54
1 parent e30f26b
commit 0134d54
Show file tree

Hide file tree

Showing 41 changed files with 9,695 additions and 70 deletions.
diff --git a/.gitignore b/.gitignore
@@ -149,3 +149,5 @@ checklink/cookies.txt
 
 # Quarto
 .quarto
+
+/.quarto/
diff --git a/.nojekyll b/.nojekyll
diff --git a/Fractal_LLM_Course/_modidx.py b/Fractal_LLM_Course/_modidx.py
@@ -5,4 +5,17 @@
                 'doc_host': 'https://enzokro.github.io',
                 'git_url': 'https://github.com/enzokro/Fractal-LLM-Course',
                 'lib_path': 'Fractal_LLM_Course'},
-  'syms': {}}
+  'syms': { 'Fractal_LLM_Course.lesson_2.mock_pipeline': { 'Fractal_LLM_Course.lesson_2.mock_pipeline.SentimentPipeline': ( 'nbdev.html#sentimentpipeline',
+                                                                                                                            'Fractal_LLM_Course/lesson_2/mock_pipeline.py'),
+                                                           'Fractal_LLM_Course.lesson_2.mock_pipeline.SentimentPipeline.__call__': ( 'nbdev.html#sentimentpipeline.__call__',
+                                                                                                                                     'Fractal_LLM_Course/lesson_2/mock_pipeline.py'),
+                                                           'Fractal_LLM_Course.lesson_2.mock_pipeline.SentimentPipeline.__init__': ( 'nbdev.html#sentimentpipeline.__init__',
+                                                                                                                                     'Fractal_LLM_Course/lesson_2/mock_pipeline.py'),
+                                                           'Fractal_LLM_Course.lesson_2.mock_pipeline.SentimentPipeline.__repr__': ( 'nbdev.html#sentimentpipeline.__repr__',
+                                                                                                                                     'Fractal_LLM_Course/lesson_2/mock_pipeline.py'),
+                                                           'Fractal_LLM_Course.lesson_2.mock_pipeline.SentimentPipeline.forward': ( 'nbdev.html#sentimentpipeline.forward',
+                                                                                                                                    'Fractal_LLM_Course/lesson_2/mock_pipeline.py'),
+                                                           'Fractal_LLM_Course.lesson_2.mock_pipeline.SentimentPipeline.preprocess': ( 'nbdev.html#sentimentpipeline.preprocess',
+                                                                                                                                       'Fractal_LLM_Course/lesson_2/mock_pipeline.py'),
+                                                           'Fractal_LLM_Course.lesson_2.mock_pipeline.SentimentPipeline.process_outputs': ( 'nbdev.html#sentimentpipeline.process_outputs',
+                                                                                                                                            'Fractal_LLM_Course/lesson_2/mock_pipeline.py')}}}
diff --git a/Fractal_LLM_Course/lesson_1/summary.md b/Fractal_LLM_Course/lesson_1/summary.md
@@ -0,0 +1,64 @@
+**Intro Slides**:
+
+**Understanding LLMs**:
+- LLMs have revolutionized the NLP field.
+- Access these powerful models with HuggingFace's API.
+
+**Starting with Sentiment Analysis**:
+- Consider: How do sentiment analyses of user reviews shape product development?
+
+**Introduction to HuggingFace**:
+- Dive into a concise overview of the HuggingFace API.
+- Discover the pipeline's simplicity and efficiency – all in just "3 lines of code."
+
+**Using Jupyter Notebooks**:
+
+**Setting up the Notebook**:
+- Enhance your notebook with extensions like `autoreload`.
+- Grasp the importance of an editable python install (`pip install -e .`).
+
+**Developing a Sentiment Analysis pipeline**:
+- Kickstart your pipeline with the HuggingFace `transformers` library.
+  - For guidance: `from transformers import pipeline`
+- Learn which model the pipeline defaults to: `distilbert`.
+- Chart your course to set up the `classifier` for Sentiment Analysis.
+
+**Understanding the classifier**:
+- Navigate the classifier using Jupyter’s commands `(?, ??, help())`.
+  - Your toolkit: `classifier?`, `classifier??`, and `help(classifier)`.
+- Decode the classifier's design and function for better insights.
+  > **Note**: Adopt this exploration strategy for any python object.
+
+**Examining the pipeline**:
+- Dissect the heart of the pipeline: 
+  - `Config`
+  - `Tokenizer`
+  - `Model`
+- Unpack each component with the `DistilBert` model as your guide.
+- Construct a `simple_pipeline` and master its steps:
+  - Tokenization
+  - Channeling tokens to the model
+  - Drawing meaning from the results.
+
+**Using Auto* Classes**:
+- Step into the realm of HuggingFace's `Auto*` classes for Config, Tokenizer, and Model.
+- Expedite your model experiments leveraging these classes.
+
+**Insights on the pipeline**:
+- Unlock advanced LLM features with ease.
+- Harness your newfound knowledge to design and hone NLP pipelines.
+
+**Connecting Components**:
+- Unite Config, Tokenizer, Model, and their respective repository files.
+  - For clarity, check out the [distilbert repository](https://huggingface.co/distilbert-base-uncased/tree/main).
+- Appreciate the pivotal role these components play in numerous HuggingFace NLP models.
+  > **Point**: Traverse deeper, and you might find more classes tailored for specialized NLP models or other domains.
+
+**Comparing Manual vs. pipeline Approaches**:
+- Weigh the strengths and weaknesses of both avenues.
+- Choose wisely based on your needs.
+
+**Tasks to Consider**:
+- **Challenge**: Embark on sentiment analysis with a fresh dataset.
+  > **Tip**: Delve into the HuggingFace `datasets` module for a head start.
+- Test your skills with another pipeline feat, be it Text Summarization or Text Generation.
diff --git a/Fractal_LLM_Course/lesson_2/__init__.py b/Fractal_LLM_Course/lesson_2/__init__.py
diff --git a/Fractal_LLM_Course/lesson_2/mock_pipeline.py b/Fractal_LLM_Course/lesson_2/mock_pipeline.py
@@ -0,0 +1,72 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/02_nbdev.ipynb.
+
+# %% auto 0
+__all__ = ['SentimentPipeline']
+
+# %% ../../nbs/02_nbdev.ipynb 4
+from transformers import AutoConfig
+from transformers import AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+
+# %% ../../nbs/02_nbdev.ipynb 5
+class SentimentPipeline:
+    """Docstring for good practice. Hello, class!"""
+    def __init__(self, model_name):
+        """
+        Sentiment Analysis pipeline.
+        """
+        self.model_name = model_name
+        self.config = AutoConfig.from_pretrained(self.model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
+
+
+    def preprocess(self, text: str):
+        """
+        Sends `text` through the LLM's tokenizer.  
+        The tokenizers turns words and characters into special inputs for the LLM.
+        """
+        tokenized_inputs = self.tokenizer(text, return_tensors='pt')
+        return tokenized_inputs
+
+
+    def forward(self, text: str):
+        """
+        First we preprocess the `text` into tokens.
+        Then we send the `token_inputs` to the model.
+        """
+        token_inputs = self.preprocess(text)
+        outputs = self.model(**token_inputs)
+        return outputs
+
+
+    def process_outputs(self, outs):
+        """
+        Here is where HuggingFace does the most for us via `pipeline`.  
+        """
+        # grab the raw "scores" that from the model for Positive and Negative labels
+        logits = outs.logits
+
+        # find the strongest label score, aka the model's decision
+        pred_idx = logits.argmax(1).item()
+
+        # use the `config` object to find the class label
+        pred_label = self.config.id2label[pred_idx]  
+
+        # calculate the human-readable number for the score
+        pred_score = logits.softmax(-1)[:, pred_idx].item()
+
+        return {
+            'label': pred_label,
+            'score': pred_score, 
+        }
+
+    def __call__(self, text: str):
+        model_outs = self.forward(text)
+        preds = self.process_outputs(model_outs)
+        return preds
+
+    def __repr__(self):
+        return f"SentimentAnalysis_{self.model_name}"
+
+
diff --git a/_quarto.yml b/_quarto.yml
@@ -0,0 +1,9 @@
+project:
+  type: website
+  output-dir: docs
+
+website:
+  navbar:
+    left:
+      - text: "Blog"
+        href: blog/index.qmd
diff --git a/blog/index.qmd b/blog/index.qmd
@@ -0,0 +1,12 @@
+---
+title: Fractal-U LLM Blog
+subtitle: Posts and notes for the Fractal-U LLM Course
+listing:
+  sort: "date desc"
+  contents: "posts"
+  sort-ui: false
+  filter-ui: false
+  categories: true
+  feed: true
+page-layout: full
+---
diff --git a/blog/posts/2023-09-24-my-first-post/index.ipynb b/blog/posts/2023-09-24-my-first-post/index.ipynb
@@ -0,0 +1,19 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/.nojekyll b/docs/.nojekyll