diff --git a/outlines/models/__init__.py b/outlines/models/__init__.py
index 7e9029587..53653f0e6 100644
--- a/outlines/models/__init__.py
+++ b/outlines/models/__init__.py
@@ -9,3 +9,4 @@
 from .hf_diffusers import HuggingFaceDiffuser
 from .hf_transformers import HuggingFaceCompletion
 from .openai import OpenAICompletion, OpenAIEmbeddings, OpenAIImageGeneration
+from .transformers import transformers
diff --git a/outlines/models/tokenizer.py b/outlines/models/tokenizer.py
new file mode 100644
index 000000000..84c317dd7
--- /dev/null
+++ b/outlines/models/tokenizer.py
@@ -0,0 +1,23 @@
+from abc import abstractmethod
+from typing import List, Protocol, Tuple, Union
+
+import numpy as np
+from numpy.typing import NDArray
+
+
+class Tokenizer(Protocol):
+    eos_token: str
+    eos_token_id: int
+    pad_token_id: int
+
+    @abstractmethod
+    def encode(
+        self, prompt: Union[str, List[str]]
+    ) -> Tuple[NDArray[np.int64], NDArray[np.int64]]:
+        """Translate the input prompts into NumPy arrays of token ids and attention mask."""
+        ...
+
+    @abstractmethod
+    def decode(self, token_ids: NDArray[np.int64]) -> List[str]:
+        """Translate an array of token ids to a string or list of strings."""
+        ...
diff --git a/outlines/models/transformers.py b/outlines/models/transformers.py
new file mode 100644
index 000000000..d71272f79
--- /dev/null
+++ b/outlines/models/transformers.py
@@ -0,0 +1,92 @@
+import math
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+
+import numpy as np
+from numpy.typing import NDArray
+
+from outlines.models.tokenizer import Tokenizer
+
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel, PreTrainedTokenizer
+
+
+__all__ = ["transformers"]
+
+
+class Transformers:
+    """Represents a `transformers` model."""
+
+    def __init__(
+        self,
+        model: "PreTrainedModel",
+        tokenizer: "PreTrainedTokenizer",
+        device: Optional[str] = None,
+    ):
+        self.device = device if device is not None else "cpu"
+        self.model = model.to(self.device)
+        self.tokenizer = tokenizer
+
+    def __call__(
+        self, input_ids: NDArray[np.int64], attention_mask: NDArray[np.int64]
+    ) -> NDArray[np.float64]:
+        import torch
+
+        # `transformers` model accept `input_ids` of size at most equal to 2. We
+        # thus reshape the input array, call the model and reshape the output
+        # logits.
+        batch_shape = input_ids.shape[:-1]
+        num_tokens = input_ids.shape[-1]
+        input_ids = input_ids.reshape(math.prod(batch_shape), num_tokens)
+
+        with torch.no_grad():
+            input_ids = torch.from_numpy(input_ids).to(self.device)
+            attention_mask = torch.from_numpy(attention_mask).to(self.device)
+
+            output = self.model(input_ids, attention_mask=attention_mask)
+
+            next_token_logits = output.logits[:, -1, :]
+            probs = torch.nn.functional.softmax(next_token_logits, dim=-1).squeeze()
+            probs = torch.atleast_2d(probs)
+            numpy_probs = probs.cpu().detach().numpy()
+
+        return numpy_probs.reshape(batch_shape + (-1,))
+
+
+class TransformersTokenizer(Tokenizer):
+    """Represents a tokenizer for models in the `transformers` library."""
+
+    def __init__(self, model_name: str, **kwargs):
+        from transformers import AutoTokenizer
+
+        kwargs.setdefault("padding_side", "left")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, **kwargs)
+        self.eos_token_id = self.tokenizer.eos_token_id
+        self.eos_token = self.tokenizer.eos_token
+
+        if not self.tokenizer.pad_token_id:
+            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+            self.pad_token_id = self.eos_token_id
+        else:
+            self.pad_token_id = self.tokenizer.pad_token_id
+            self.pad_token = self.tokenizer.pad_token
+
+    def encode(
+        self, prompt: Union[str, List[str]], **kwargs
+    ) -> Tuple[NDArray[np.int64], NDArray[np.int64]]:
+        kwargs["padding"] = True
+        kwargs["return_tensors"] = "np"
+        output = self.tokenizer(prompt, **kwargs)
+        return output["input_ids"], output["attention_mask"]
+
+    def decode(self, token_ids: NDArray[np.int64]) -> List[str]:
+        text = self.tokenizer.batch_decode(token_ids)
+        return text
+
+
+def transformers(model_name: str, device: Optional[str] = None, **model_kwargs):
+    from transformers import AutoModelForCausalLM
+
+    model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
+    tokenizer = TransformersTokenizer(model_name)
+
+    return Transformers(model, tokenizer, device)
diff --git a/outlines/text/__init__.py b/outlines/text/__init__.py
index 4b187905e..8870c7a1f 100644
--- a/outlines/text/__init__.py
+++ b/outlines/text/__init__.py
@@ -1,2 +1,3 @@
 from .functions import function
+from .generate import continuation
 from .prompts import prompt, render
diff --git a/outlines/text/generate/__init__.py b/outlines/text/generate/__init__.py
new file mode 100644
index 000000000..3176b9b4a
--- /dev/null
+++ b/outlines/text/generate/__init__.py
@@ -0,0 +1 @@
+from .continuation import continuation
diff --git a/outlines/text/generate/continuation.py b/outlines/text/generate/continuation.py
new file mode 100644
index 000000000..e616d3f36
--- /dev/null
+++ b/outlines/text/generate/continuation.py
@@ -0,0 +1,52 @@
+from typing import List, Optional
+
+import numpy as np
+from numpy.typing import NDArray
+
+from outlines.text.generate.sequence import Sequence
+
+
+class Continuation(Sequence):
+    """Represents a completion generation model.
+
+    `Completion` instances are unconstrained generation models that stop when an EOS token
+    has been found or when the maximum number of tokens has been reached.
+
+    >> import outlines.text as text
+    >> sequence = text.sequence(model)("Say something")
+
+    """
+
+    def __init__(self, model, max_tokens: Optional[int]):
+        super().__init__(model, max_tokens)
+
+    def is_finished(self, token_ids: NDArray[np.int64]) -> NDArray[np.bool_]:
+        """Determine whether the sequences reached maximum length of end with
+        and EOS token.
+
+        In practice, `Sequence`'s `__call__` methods only passed the `token_ids`
+        of the sequences that haven't been marked as finished already, which is
+        why we only need to look for the EOS token in the last element rather
+        than in the whole sequence.
+
+        Parameters
+        ----------
+        token_ids
+            The input sequences.
+
+        """
+        is_finished = np.zeros((token_ids.shape[0],), dtype=np.bool_)
+        is_finished[token_ids[:, -1] == self.model.tokenizer.eos_token_id] = True
+
+        return is_finished
+
+    def postprocess_completions(self, completions: List[str]) -> List[str]:
+        """Remove the EOS token from the completion."""
+        return [
+            completion.replace(self.model.tokenizer.eos_token, "")
+            for completion in completions
+        ]
+
+
+def continuation(model, max_tokens: Optional[int] = None):
+    return Continuation(model, max_tokens)
diff --git a/outlines/text/generate/sequence.py b/outlines/text/generate/sequence.py
new file mode 100644
index 000000000..614297edd
--- /dev/null
+++ b/outlines/text/generate/sequence.py
@@ -0,0 +1,254 @@
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+from numpy.random import Generator
+from numpy.typing import NDArray
+
+
+class Sequence:
+    """Represents a sequence generation method."""
+
+    def __init__(self, model, max_tokens: Optional[int] = None):
+        """Create a `Sequence` instance.
+
+        Parameters
+        ----------
+        model
+            The instance of the model used to generate next-token probabilities.
+        max_tokens
+            The maximum number of tokens that will be generated if no termination
+            condition is met.
+
+        """
+        self.model = model
+        self.max_tokens = max_tokens
+
+    def is_finished(self, token_ids: NDArray[np.int64]) -> NDArray[np.bool_]:
+        """Determine whether we should stop the generation."""
+        raise NotImplementedError(
+            "`Sequence.is_finished` must be implemented by subclasses."
+        )
+
+    def postprocess_completions(self, completions: List[str]) -> List[str]:
+        return completions
+
+    def step(
+        self,
+        rng: Generator,
+        token_ids: NDArray[np.int64],
+        attention_mask: NDArray[np.int64],
+        samples: int = 1,
+    ) -> Tuple[NDArray[np.int64], NDArray[float]]:
+        """Generate one or several tokens that complete the input sequence.
+
+        The sampling step consists in using a model to generate next-token
+        logits and then sample `samples`-many new tokens from a categorical
+        distribution parametrized by these logits.
+
+        Parameters
+        ----------
+        rng
+            NumPy random number Generator instance
+        token_ids
+            The token ids passed as an input to the model, of shape `batch_shape
+            + (num_tokens,)`, where `num_tokens` is the sequences' length.
+        samples
+            The number of continuations to sample from the next-token probability
+            distribution.
+
+        Returns
+        -------
+        A tuple with an array of shape `new_batch_shape + (num_tokens+1,)`that
+        contains the completed sequences (input token ids and generated token
+        ids) and an array of shape `new_batch_shape + (vocab_size,)` that
+        contains the next token probabilities.
+        `new_batch_shape` is computed by removing dimensions of size one in
+        `(samples,) + batch_shape`.
+
+        """
+        num_input_dims = token_ids.ndim
+        probs = self.model(token_ids, attention_mask)
+
+        # Sample `samples`-many new tokens
+        next_token_ids = vectorized_random_choice(rng, probs, samples)
+
+        # Add the missing `num_tokens` and `num_sample` dimensions
+        next_token_ids = np.expand_dims(next_token_ids, -1)
+        token_ids = np.expand_dims(token_ids, 0)
+
+        # Expand the input `token_ids` array to be able to concatenate several
+        # samples.
+        if samples > 1:
+            repetitions = (samples,) + (1,) * num_input_dims
+            token_ids = np.tile(token_ids, repetitions)
+            probs = np.tile(probs, repetitions)
+
+        token_ids = np.concatenate([token_ids, next_token_ids], axis=-1)
+
+        # Merge sample and batch dimensions by removing dimensions of length
+        # 1. The shape of the resulting arrays is `new_batch_shape + (num_tokens,)`
+        # and `new_batch_shape + (vocab_size,)` respectively.
+        token_ids = np.atleast_2d(token_ids.squeeze())
+        probs = np.atleast_2d(probs.squeeze())
+
+        return token_ids, probs
+
+    def expand_attention_mask(
+        self, attention_mask: NDArray[np.int64]
+    ) -> NDArray[np.int64]:
+        """Expand the attention mask after the last completion."""
+        batch_shape = attention_mask.shape[:-1]
+        attention_mask = np.concatenate(
+            [attention_mask, np.broadcast_to([1], batch_shape + (1,))], axis=-1
+        )
+        return attention_mask
+
+    def update_token_ids(
+        self,
+        is_finished: NDArray[np.bool_],
+        token_ids: NDArray[np.int64],
+        token_ids_unfinished: NDArray[np.int64],
+    ) -> NDArray[np.int64]:
+        """Update the array of token ids after the last completion.
+
+        We only generate new tokens for the sequences that are not finished. We thus
+        update the array with the new tokens, and append pad tokens to the finished
+        sequences.
+
+        Parameters
+        ----------
+        is_finished
+            Boolean array that indicates which sequences are finished.
+        token_ids
+            Array that contains the sequences before the generation's last step.
+        token_ids_unfinished
+            Array that contains the sequences of the unfinished sequences
+            after the generation's last step.
+
+        Returns
+        -------
+        An array that contains the updated array that contains the sequences. We append
+        pad tokens to the finished sequences.
+
+        """
+        batch_shape = token_ids.shape[:-1]
+        num_tokens = token_ids.shape[-1]
+        new_token_ids = np.empty(batch_shape + (num_tokens + 1,), dtype=np.int64)
+
+        token_ids_finished = token_ids[is_finished]
+        batch_shape_finished = token_ids_finished.shape[:-1]
+        token_ids_finished = np.concatenate(
+            [
+                token_ids_finished,
+                np.broadcast_to(
+                    [self.model.tokenizer.pad_token_id], batch_shape_finished + (1,)
+                ),
+            ],
+            axis=-1,
+        )
+
+        new_token_ids[~is_finished] = token_ids_unfinished
+        new_token_ids[is_finished] = token_ids_finished
+
+        return new_token_ids
+
+    def __call__(
+        self,
+        prompt: Union[str, List[str]],
+        samples: int = 1,
+        rng: Generator = np.random.default_rng(),
+    ) -> Union[str, List[str]]:
+        """Generate a new sequence given a prompt.
+
+        Parameters
+        ----------
+        prompt
+            The input prompt.
+        samples
+            The number of samples to generate for each prompt.
+
+        Returns
+        -------
+        The full sequence that contains the prompts and the generated string.
+
+        """
+        token_ids, attention_mask = self.model.tokenizer.encode(prompt)
+        num_prompt_tokens = token_ids.shape[-1]
+
+        if samples > 1:
+            token_ids, _ = self.step(rng, token_ids, attention_mask, samples)
+            is_finished = self.is_finished(token_ids)
+
+            num_batch_dims = token_ids.ndim - 1
+            repetitions = (samples,) + (1,) * num_batch_dims
+            attention_mask = np.tile(attention_mask, repetitions)
+            attention_mask = self.expand_attention_mask(attention_mask)
+        else:
+            batch_shape = token_ids.shape[:-1]
+            is_finished = np.zeros(batch_shape, dtype=np.bool_)
+
+        while True:
+            num_generated_tokens = token_ids.shape[-1] - num_prompt_tokens
+            if np.all(is_finished) or num_generated_tokens == self.max_tokens:
+                break
+
+            token_ids_unfinished = token_ids[~is_finished]
+            attention_mask_unfinished = attention_mask[~is_finished]
+            token_ids_unfinished, _ = self.step(
+                rng, token_ids_unfinished, attention_mask_unfinished
+            )
+
+            token_ids = self.update_token_ids(
+                is_finished, token_ids, token_ids_unfinished
+            )
+            attention_mask = self.expand_attention_mask(attention_mask)
+            is_finished[~is_finished] = self.is_finished(token_ids_unfinished).flatten()
+
+        result = self.model.tokenizer.decode(token_ids)
+        result = self.postprocess_completions(result)
+
+        if len(result) == 1:
+            return result[0]
+
+        return result
+
+
+vsearchsorted = np.vectorize(np.searchsorted, otypes=[int], signature="(n),()->()")
+
+
+def vectorized_random_choice(
+    rng: Generator,
+    p: NDArray[np.float64],
+    samples: int = 1,
+):
+    """Vectorized implementation of `np.random.choice`.
+
+    `np.random.choice` does not support arrays of probability. This implements
+    the equivalent of this function where the `p` argument can be a matrix.
+
+    Note
+    ----
+    `searchsorted` might be more efficient here since the number of elements
+    can be quite large.
+
+    Parameters
+    ----------
+    rng
+        NumPy random number Generator instance
+    p
+        An array of probability of shape `(num_probability_vectors, num_items)`
+        that must sum to 1.
+    samples
+        The number of samples to take for each probability vector.
+
+    Returns
+    -------
+    An array of shape `(num_samples, batch_size)`
+
+    """
+
+    cumsum = np.expand_dims(p.cumsum(axis=-1), 0)
+    rand = rng.random((samples,) + p.shape[:-1])
+    idx = vsearchsorted(cumsum, rand)
+
+    return idx
diff --git a/outlines/text/masks.py b/outlines/text/masks.py
index a7e7b4f52..c57625736 100644
--- a/outlines/text/masks.py
+++ b/outlines/text/masks.py
@@ -42,7 +42,7 @@ def create_int_mask(vocabulary: Dict[str, int]) -> np.ndarray:
 
 def create_float_mask(vocabulary: Dict[str, int]) -> np.ndarray:
     """Create a mask to generate floating point numbers."""
-    mask = create_mask_from_regex(vocabulary, r"^([0-9]+([.][0-9]*)?|[.][0-9]+)$")
+    mask = create_mask_from_regex(vocabulary, r"^(([0-9]+)?([.]([0-9]*)?)?|[.][0-9]+)$")
 
     return mask
 
diff --git a/pyproject.toml b/pyproject.toml
index 8e3e8cd6b..62c7ae99a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,8 +75,8 @@ module = [
     "diffusers",
     "jinja2",
     "joblib",
-    "numpy.*",
     "openai",
+    "numpy.*",
     "perscache.*",
     "PIL",
     "PIL.Image",
@@ -86,7 +86,7 @@ module = [
     "tenacity.*",
     "tiktoken.*",
     "torch",
-    "transformers",
+    "transformers.*",
 ]
 ignore_missing_imports = true
 
@@ -104,6 +104,7 @@ omit = [
 exclude_lines = [
     "pragma: no cover",
     "if TYPE_CHECKING:",
+    "...",
 ]
 show_missing = true
 
diff --git a/tests/models/test_tokenizer.py b/tests/models/test_tokenizer.py
new file mode 100644
index 000000000..831f7fe3e
--- /dev/null
+++ b/tests/models/test_tokenizer.py
@@ -0,0 +1,8 @@
+import pytest
+
+from outlines.models.tokenizer import Tokenizer
+
+
+def test_tokenizer():
+    with pytest.raises(TypeError, match="instantiate abstract"):
+        Tokenizer()
diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py
new file mode 100644
index 000000000..1d7bcb40a
--- /dev/null
+++ b/tests/models/test_transformers.py
@@ -0,0 +1,67 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+from transformers.models.gpt2 import GPT2TokenizerFast
+
+from outlines.models.transformers import TransformersTokenizer, transformers
+
+TEST_MODEL = "hf-internal-testing/tiny-random-GPTJForCausalLM"
+
+
+def test_tokenizer():
+    tokenizer = TransformersTokenizer(TEST_MODEL)
+    assert tokenizer.eos_token_id == 0
+    assert tokenizer.pad_token_id == 0
+    assert isinstance(tokenizer.tokenizer, GPT2TokenizerFast)
+
+    token_ids, attention_mask = tokenizer.encode("Test")
+    assert token_ids.ndim == 2
+    assert token_ids.shape[0] == 1
+    assert isinstance(token_ids, np.ndarray)
+    assert token_ids.shape == attention_mask.shape
+
+    token_ids, attention_mask = tokenizer.encode(["Test", "Test"])
+    assert token_ids.ndim == 2
+    assert token_ids.shape[0] == 2
+    assert isinstance(token_ids, np.ndarray)
+    assert token_ids.shape == attention_mask.shape
+
+    token_ids, attention_mask = tokenizer.encode(["Test", "A long sentence"])
+    assert token_ids.shape == attention_mask.shape
+    assert attention_mask[0][0] == tokenizer.pad_token_id
+
+    text = tokenizer.decode(np.array([[0, 1, 2]]))
+    isinstance(text, str)
+
+    text = tokenizer.decode(np.array([[0, 1, 2], [3, 4, 5]]))
+    isinstance(text, list)
+    isinstance(text[0], str)
+    isinstance(text[1], str)
+
+
+def test_model():
+    with pytest.raises(RuntimeError, match="Expected one of cpu, cuda"):
+        transformers(TEST_MODEL, device="non_existent")
+
+    model = transformers(TEST_MODEL, device="cpu")
+    assert isinstance(model.tokenizer, TransformersTokenizer)
+    assert model.device == "cpu"
+
+    input_ids = np.array([[0, 1, 2]])
+    logits = model(input_ids, np.ones_like(input_ids))
+    assert isinstance(logits, np.ndarray)
+    assert logits.ndim == 2
+    assert logits.shape[0] == 1
+
+    input_ids = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
+    logits = model(input_ids, np.ones_like(input_ids))
+    assert isinstance(logits, np.ndarray)
+    assert logits.ndim == 2
+    assert logits.shape[0] == 3
+
+    input_ids = np.array([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [0, 1, 2]]])
+    logits = model(input_ids, np.ones_like(input_ids))
+    assert logits.ndim == 3
+    assert logits.shape[0] == 2
+    assert logits.shape[1] == 2
+    assert_array_equal(logits[0][0], logits[1][1])
diff --git a/tests/text/generate/test_continuation.py b/tests/text/generate/test_continuation.py
new file mode 100644
index 000000000..aaf017491
--- /dev/null
+++ b/tests/text/generate/test_continuation.py
@@ -0,0 +1,42 @@
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from outlines.text.generate.continuation import Continuation, continuation
+
+
+class Tokenizer:
+    eos_token = "<EOS>"
+    eos_token_id = 0
+    pad_token_ids = -1
+
+
+class Model:
+    tokenizer = Tokenizer()
+
+
+def test_continuation_is_finished():
+    model = continuation(Model(), 10)
+    assert isinstance(model, Continuation)
+
+    token_ids = np.array([[3, 2]])
+    result = model.is_finished(token_ids)
+    assert_array_equal(result, [False])
+
+    token_ids = np.array([[3, 2, 0]])
+    result = model.is_finished(token_ids)
+    assert_array_equal(result, [True])
+
+    token_ids = np.array([[3, 2, 1], [3, 2, 0]])
+    result = model.is_finished(token_ids)
+    assert_array_equal(result, [False, True])
+
+    token_ids = np.array([[3, 2, 1, 0], [3, 2, 0, -1]])
+    result = model.is_finished(token_ids)
+    assert_array_equal(result, [True, False])
+
+
+def test_continuation_postprocess():
+    model = continuation(Model())
+    result = model.postprocess_completions(["Here<EOS>"])
+    assert len(result) == 1
+    assert result[0] == "Here"
diff --git a/tests/text/generate/test_integration_transfomers.py b/tests/text/generate/test_integration_transfomers.py
new file mode 100644
index 000000000..55bbde966
--- /dev/null
+++ b/tests/text/generate/test_integration_transfomers.py
@@ -0,0 +1,24 @@
+import numpy as np
+
+import outlines.models as models
+from outlines.text.generate.continuation import continuation
+
+
+def test_transformers_integration_completion():
+    rng = np.random.default_rng(0)
+
+    model_name = "hf-internal-testing/tiny-random-GPTJForCausalLM"
+    model = models.transformers(model_name, device="cpu")
+    sequence = continuation(model)("prompt", rng=rng)
+    assert isinstance(sequence, str)
+    assert model.tokenizer.eos_token not in sequence
+
+    sequence = continuation(model, max_tokens=10)("prompt", rng=rng)
+    assert isinstance(sequence, str)
+
+
+def test_transformers_integration_with_pad_token():
+    model_name = "hf-internal-testing/tiny-random-XLMRobertaXLForCausalLM"
+    model = models.transformers(model_name, device="cpu")
+    assert model.tokenizer.pad_token_id == 1
+    assert model.tokenizer.pad_token == "<pad>"
diff --git a/tests/text/generate/test_sequence.py b/tests/text/generate/test_sequence.py
new file mode 100644
index 000000000..9659e8d6a
--- /dev/null
+++ b/tests/text/generate/test_sequence.py
@@ -0,0 +1,393 @@
+from typing import Dict, List, Union
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from outlines.text.generate.sequence import Sequence, vectorized_random_choice
+
+
+def test_vectorized_random_choice():
+    rng = np.random.default_rng(0)
+
+    probs = np.array([[1, 0, 0, 0]])
+    sample = vectorized_random_choice(rng, probs)
+    assert sample.shape == (1, 1)
+    assert_array_equal(sample, np.zeros((1, 1)))
+
+    probs = np.array([[1, 0, 0, 0]])
+    sample = vectorized_random_choice(rng, probs, samples=3)
+    assert sample.shape == (3, 1)
+    assert_array_equal(sample, np.zeros((3, 1)))
+
+    probs = np.tile(np.array([[1, 0, 0, 0]]), (2, 1))
+    sample = vectorized_random_choice(rng, probs)
+    assert sample.shape == (1, 2)
+    assert_array_equal(sample, np.zeros((1, 2)))
+
+    probs = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])
+    sample = vectorized_random_choice(rng, probs, samples=3)
+    assert sample.shape == (3, 2)
+    assert_array_equal(sample, [[0, 1], [0, 1], [0, 1]])
+
+    probs = np.array([[[1, 0, 0, 0], [0, 1, 0, 0]], [[0, 0, 1, 0], [0, 0, 0, 1]]])
+    sample = vectorized_random_choice(rng, probs, samples=3)
+    assert sample.shape == (3, 2, 2)
+    assert_array_equal(sample, [[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]])
+
+
+def test_sequence_error():
+    with pytest.raises(NotImplementedError, match="must be implemented"):
+        sequence = Sequence(None)
+        sequence.is_finished(np.array([1]))
+
+
+def ModelStep(logits):
+    """Mock model to test `Sequence.step`"""
+
+    logits = np.array([logits])
+
+    def call(input_ids, *_):
+        """Call the model.
+
+        We first repeat the logits `num_sequences` times, and then
+        reshape the resulting array to match the batch size.
+
+        """
+        import math
+
+        batch_shape = input_ids.shape[:-1]
+        vocab_shape = (logits.shape[-1],)
+        shaped_logits = np.tile(logits, (math.prod(batch_shape), 1))
+        return shaped_logits.reshape(batch_shape + vocab_shape)
+
+    return call
+
+
+def test_sequence_step():
+    rng = np.random.default_rng(0)
+
+    logits = np.array([0, 1, 0, 0])
+    model = ModelStep(logits)
+
+    sequence = Sequence(model)
+
+    input_ids = np.array([[1, 2]])
+    token_ids, probs = sequence.step(rng, input_ids, np.ones((1, 2)))
+    assert_array_equal(token_ids, [[1, 2, 1]])
+    assert probs.shape == (1, 4)
+
+
+def test_sequence_step_batch():
+    rng = np.random.default_rng(0)
+
+    logits = np.array([0, 1, 0, 0])
+    model = ModelStep(logits)
+
+    sequence = Sequence(model)
+
+    input_ids = np.array([[1, 2], [3, 4]])
+    token_ids, probs = sequence.step(rng, input_ids, np.ones((2, 2)))
+    assert_array_equal(token_ids, [[1, 2, 1], [3, 4, 1]])
+    assert probs.shape == (2, 4)
+
+
+def test_sequence_step_sample():
+    rng = np.random.default_rng(0)
+
+    logits = np.array([0, 1, 0, 0])
+    model = ModelStep(logits)
+
+    sequence = Sequence(model)
+    input_ids = np.array([[1, 2]])
+    token_ids, probs = sequence.step(rng, input_ids, np.ones((1, 2)), samples=3)
+    assert_array_equal(token_ids, [[1, 2, 1], [1, 2, 1], [1, 2, 1]])
+    assert probs.shape == (3, 4)
+
+
+def test_sequence_sample_batch():
+    rng = np.random.default_rng(0)
+
+    logits = np.array([0, 1, 0, 0])
+    model = ModelStep(logits)
+
+    sequence = Sequence(model)
+    input_ids = np.array([[1, 2, 1], [3, 4, 1]])
+    token_ids, probs = sequence.step(rng, input_ids, np.ones((2, 3)), samples=3)
+    assert_array_equal(
+        token_ids,
+        [
+            [[1, 2, 1, 1], [3, 4, 1, 1]],
+            [[1, 2, 1, 1], [3, 4, 1, 1]],
+            [[1, 2, 1, 1], [3, 4, 1, 1]],
+        ],
+    )
+    assert probs.shape == (3, 2, 4)
+
+
+def test_sequence_step_loop():
+    """Make sure that we can feed `step`'s output back as an input."""
+
+    rng = np.random.default_rng(0)
+
+    logits = np.array([0, 1, 0, 0])
+    model = ModelStep(logits)
+
+    sequence = Sequence(model)
+    input_ids = np.array([[1, 2]])
+    token_ids, _ = sequence.step(rng, input_ids, np.ones((1, 2)))
+    token_ids, probs = sequence.step(rng, token_ids, np.ones((1, 3)))
+    assert_array_equal(token_ids, [[1, 2, 1, 1]])
+    assert probs.shape == (1, 4)
+
+    input_ids = np.array([[1, 2], [3, 4]])
+    token_ids, _ = sequence.step(rng, input_ids, np.ones((2, 2)))
+    token_ids, probs = sequence.step(rng, token_ids, np.ones((2, 3)))
+    assert_array_equal(token_ids, [[1, 2, 1, 1], [3, 4, 1, 1]])
+    assert probs.shape == (2, 4)
+
+    # The number of samples becomes the batch size at the next iteration.
+    input_ids = np.array([[1, 2]])
+    token_ids, _ = sequence.step(rng, input_ids, np.ones((1, 2)), samples=3)
+    token_ids, probs = sequence.step(rng, token_ids, np.ones((3, 3)))
+    assert_array_equal(token_ids, [[1, 2, 1, 1], [1, 2, 1, 1], [1, 2, 1, 1]])
+    assert probs.shape == (3, 4)
+
+
+def test_sequence_step_loop_general():
+    rng = np.random.default_rng(0)
+
+    logits = np.array([0, 1, 0, 0])
+    model = ModelStep(logits)
+
+    sequence = Sequence(model)
+    input_ids = np.array([[1, 2, 1], [3, 4, 1]])
+    token_ids, _ = sequence.step(rng, input_ids, np.ones((1, 3)), samples=3)
+    result, _ = sequence.step(rng, token_ids, np.ones((3, 4)))
+    assert result.shape == (3, 2, 5)
+    assert_array_equal(
+        result,
+        [
+            [[1, 2, 1, 1, 1], [3, 4, 1, 1, 1]],
+            [[1, 2, 1, 1, 1], [3, 4, 1, 1, 1]],
+            [[1, 2, 1, 1, 1], [3, 4, 1, 1, 1]],
+        ],
+    )
+
+
+class TokenizerUpdateTokens:
+    pad_token_id = -1
+
+
+class ModelUpdateTokens:
+    tokenizer = TokenizerUpdateTokens()
+
+
+def test_update_token_ids_all_unfinished():
+    sequence = Sequence(ModelUpdateTokens())
+
+    previous_token_ids = np.array([[1, 1], [1, 1]])
+    is_finished = np.array([False, False])
+    token_ids_unfinished = np.array([[1, 1, 1], [1, 1, 1]])
+
+    result = sequence.update_token_ids(
+        is_finished, previous_token_ids, token_ids_unfinished
+    )
+    assert_array_equal(result, [[1, 1, 1], [1, 1, 1]])
+
+
+def test_update_token_ids_some_unfinished():
+    "Makes sure that the pad token is appended to finished sequences."
+    sequence = Sequence(ModelUpdateTokens())
+
+    previous_token_ids = np.array([[1, 1], [1, 1]])
+    token_ids_unfinished = np.array([[1, 1, 1]])
+    is_finished = np.array([True, False])
+    result = sequence.update_token_ids(
+        is_finished, previous_token_ids, token_ids_unfinished
+    )
+    assert_array_equal(result, [[1, 1, -1], [1, 1, 1]])
+
+
+@pytest.mark.xfail
+def test_update_token_ids_larger_dimensions():
+    sequence = Sequence(ModelUpdateTokens())
+
+    previous_token_ids = np.array([[1, 1], [1, 1]])
+    is_finished = np.array([False, False])
+    token_ids_unfinished = np.array([[1, 1, 1], [1, 1, 1]])
+    result = sequence.update_token_ids(
+        is_finished, previous_token_ids, token_ids_unfinished
+    )
+    assert_array_equal(result, [[1, 1, -1], [1, 1, 1]])
+
+
+class MockModel:
+    def __init__(self, tokenizer, logits):
+        self.tokenizer = tokenizer
+        self.logits = np.array(logits)
+        self.iteration_idx = 0
+
+    def __call__(self, input_ids, *_):
+        import math
+
+        batch_shape = input_ids.shape[:-1]
+        vocab_shape = (self.logits.shape[-1],)
+        shaped_logits = np.tile(
+            self.logits[self.iteration_idx], (math.prod(batch_shape), 1)
+        )
+        self.iteration_idx += 1
+
+        return shaped_logits.reshape(batch_shape + vocab_shape)
+
+
+class MockTokenizer:
+    def __init__(self, vocabulary: Dict[str, int]):
+        self.vocabulary = vocabulary
+        self.pad_token_id = -1
+
+    def encode(self, prompts: Union[str, List[str]]):
+        if isinstance(prompts, str):
+            prompts = [prompts]
+
+        token_ids = np.array([[self.vocabulary[prompt]] for prompt in prompts])
+        attention_mask = np.ones_like(token_ids)
+
+        return token_ids, attention_mask
+
+    def decode(self, token_ids):
+        return token_ids
+
+
+def test_call_single_prompt():
+    class FinishAfterTwo(Sequence):
+        def __init__(self, model):
+            super().__init__(model)
+            self.iteration_idx = 0
+
+        def is_finished(self, token_ids):
+            """Finish generating the sequence after two iterations"""
+            if self.iteration_idx == 0:
+                self.iteration_idx += 1
+                return np.array([False])
+            else:
+                return np.array([True])
+
+    tokenizer = MockTokenizer({"Test": 0, "a": 1, "b": 2})
+    model = MockModel(tokenizer, [[1, 0, 0], [0, 1, 0]])
+    sequence = FinishAfterTwo(model)
+
+    result = sequence("Test")
+    assert_array_equal(result, [0, 0, 1])
+
+
+def test_call_prompt_list():
+    class Tokenizer:
+        def __init__(self, vocabulary: Dict[str, int]):
+            self.vocabulary = vocabulary
+            self.pad_token_id = -1
+
+        def __call__(self, prompts: List[str], **_):
+            return {
+                "input_ids": np.array([[self.vocabulary[prompt]] for prompt in prompts])
+            }
+
+        def batch_decode(self, token_ids):
+            return token_ids
+
+    class FinishAfterThree(Sequence):
+        def __init__(self, model):
+            super().__init__(model)
+            self.iteration_idx = 0
+
+        def is_finished(self, token_ids):
+            """Finish generating the first sequence after two iteration and the
+            second one after two iterations.
+
+            """
+            if self.iteration_idx == 0:
+                self.iteration_idx += 1
+                return np.array([False, False, False])
+            elif self.iteration_idx == 1:
+                self.iteration_idx += 1
+                return np.array([True, False, True])
+            else:
+                return np.array([True])  # We only consider the unfinished sequences
+
+    tokenizer = MockTokenizer(
+        {"Test1": 0, "Test2": 1, "a": 2, "b": 3, "c": 4, "Test3": 5}
+    )
+    model = MockModel(
+        tokenizer,
+        [[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0]],
+    )
+    sequence = FinishAfterThree(model)
+
+    result = sequence(["Test1", "Test2", "Test3"])
+    assert_array_equal(result, [[0, 2, 3, -1], [1, 2, 3, 4], [5, 2, 3, -1]])
+
+
+def test_call_single_prompt_samples():
+    class FinishAfterTwo(Sequence):
+        def __init__(self, model):
+            super().__init__(model)
+            self.iteration_idx = 0
+
+        def is_finished(self, token_ids):
+            if self.iteration_idx == 0:
+                self.iteration_idx += 1
+                return np.array([False, False, False])
+            else:
+                return np.array([True, True, True])
+
+    tokenizer = MockTokenizer({"a": 0, "b": 1, "c": 2, "Test": 4})
+    model = MockModel(tokenizer, [[1, 0, 0, 0], [0, 1, 0, 0]])
+    sequence = FinishAfterTwo(model)
+    result = sequence("Test", samples=3)
+    assert_array_equal(result, [[4, 0, 1], [4, 0, 1], [4, 0, 1]])
+
+    class FinishAfterOne(Sequence):
+        def __init__(self, model):
+            super().__init__(model)
+
+        def is_finished(self, token_ids):
+            return np.array([True, True, True])
+
+    tokenizer = MockTokenizer({"a": 0, "b": 1, "c": 3, "Test": 4})
+    model = MockModel(tokenizer, [[1, 0, 0, 0], [0, 1, 0, 0]])
+    sequence = FinishAfterOne(model)
+    result = sequence("Test", samples=3)
+    assert_array_equal(result, [[4, 0], [4, 0], [4, 0]])
+
+
+def test_call_prompt_list_samples():
+    class FinishAfterThree(Sequence):
+        def __init__(self, model):
+            super().__init__(model)
+            self.iteration_idx = 0
+
+        def is_finished(self, token_ids):
+            if self.iteration_idx == 0:
+                self.iteration_idx += 1
+                batch_shape = token_ids.shape[:-1]
+                return np.zeros(batch_shape, dtype=np.bool_)
+            elif self.iteration_idx == 1:
+                self.iteration_idx += 1
+                return np.array(
+                    [[True, False, True], [True, False, True], [True, False, True]]
+                )
+            else:
+                return np.array([True, True, True])
+
+    tokenizer = MockTokenizer(
+        {"a": 0, "b": 1, "c": 2, "Test1": 3, "Test2": 4, "Test3": 5}
+    )
+    model = MockModel(
+        tokenizer, [[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0]]
+    )
+    sequence = FinishAfterThree(model)
+
+    result = sequence(["Test1", "Test2", "Test3"], samples=3)
+    assert_array_equal(
+        result, np.tile([[3, 0, 1, -1], [4, 0, 1, 2], [5, 0, 1, -1]], (3, 1, 1))
+    )
diff --git a/tests/text/test_masks.py b/tests/text/test_masks.py
index c9d37353e..3c0dc782c 100644
--- a/tests/text/test_masks.py
+++ b/tests/text/test_masks.py
@@ -24,11 +24,12 @@ def test_float_mask():
         "1.": 5,
         "0.": 6,
         "1.2.3": 7,
+        ".": 8,
     }
 
     mask = create_float_mask(vocabulary)
     assert_array_equal(
-        mask, np.array([True, True, False, False, True, True, True, False])
+        mask, np.array([True, True, False, False, True, True, True, False, True])
     )