From f63cf2c9373d9c8f2b93ff87ee187560810c0686 Mon Sep 17 00:00:00 2001
From: David Beaumont <dbeaumont@google.com>
Date: Wed, 15 Feb 2023 18:44:20 +0100
Subject: [PATCH] Project import generated by Copybara. (#2890)

PiperOrigin-RevId: 509849832
---
 .../phonenumbers/metadata/LengthsParser.java  |  71 +++
 .../i18n/phonenumbers/metadata/Types.java     |   2 +-
 .../DigitSequenceMatcher.java                 | 317 +++++++++
 .../metadata/finitestatematcher/OpCode.java   | 262 ++++++++
 .../compiler/MatcherBytes.java                | 247 +++++++
 .../compiler/MatcherCompiler.java             | 299 +++++++++
 .../compiler/Operation.java                   | 600 ++++++++++++++++++
 .../compiler/Statistics.java                  |  44 ++
 .../metadata/model/ExamplesTableSchema.java   |   4 +-
 .../metadata/model/FileBasedCsvLoader.java    |   2 +-
 .../metadata/model/FormatsTableSchema.java    |   2 +-
 .../metadata/model/RangesTableSchema.java     | 163 +++--
 .../phonenumbers/metadata/regex/AnyPath.java  | 181 ++++++
 .../phonenumbers/metadata/regex/Edge.java     | 351 ++++++++++
 .../metadata/regex/EdgeWriter.java            | 343 ++++++++++
 .../metadata/regex/NfaFlattener.java          | 195 ++++++
 .../phonenumbers/metadata/regex/Node.java     |  51 ++
 .../metadata/regex/RangeTreeConverter.java    | 123 ++++
 .../metadata/regex/RegexFormatter.java        | 118 ++++
 .../metadata/regex/RegexGenerator.java        | 171 +++++
 .../metadata/regex/SubgroupOptimizer.java     | 190 ++++++
 .../metadata/regex/TrailingPathOptimizer.java | 206 ++++++
 .../metadata/table/CsvParser.java             |  30 +-
 .../phonenumbers/metadata/table/CsvTable.java |   2 +-
 .../metadata/table/RangeTable.java            |   2 +-
 .../phonenumbers/metadata/table/Schema.java   |   2 +-
 .../metadata/LengthsParserTest.java           |  76 +++
 .../metadata/RangeSpecificationTest.java      |   2 +-
 .../DigitSequenceMatcherTest.java             | 210 ++++++
 .../compiler/CompilerRegressionTest.java      | 317 +++++++++
 .../compiler/MatcherCompilerTest.java         | 144 +++++
 .../compiler/OperationTest.java               |  60 ++
 .../compiler/regression_test_data.textpb      | 295 +++++++++
 .../metadata/regex/AnyPathTest.java           | 106 ++++
 .../phonenumbers/metadata/regex/EdgeTest.java | 224 +++++++
 .../metadata/regex/EdgeWriterTest.java        | 154 +++++
 .../metadata/regex/NfaBuilder.java            |  98 +++
 .../metadata/regex/NfaFlattenerTest.java      | 229 +++++++
 .../phonenumbers/metadata/regex/NodeTest.java |  68 ++
 .../regex/RangeTreeConverterTest.java         | 154 +++++
 .../metadata/regex/RegexFormatterTest.java    | 107 ++++
 .../metadata/regex/RegexGeneratorTest.java    | 197 ++++++
 .../metadata/regex/SubgraphOptimizerTest.java |  80 +++
 .../regex/TrailingPathOptimizerTest.java      | 122 ++++
 metadata/src/test/proto/regression_test.proto |  49 ++
 45 files changed, 6559 insertions(+), 111 deletions(-)
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/LengthsParser.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcher.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/OpCode.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherBytes.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompiler.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Operation.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Statistics.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/AnyPath.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Edge.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriter.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattener.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Node.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverter.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatter.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexGenerator.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/SubgroupOptimizer.java
 create mode 100644 metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizer.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/LengthsParserTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcherTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/CompilerRegressionTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompilerTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/OperationTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/regression_test_data.textpb
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/AnyPathTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriterTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaBuilder.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattenerTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NodeTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverterTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatterTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexGeneratorTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/SubgraphOptimizerTest.java
 create mode 100644 metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizerTest.java
 create mode 100644 metadata/src/test/proto/regression_test.proto

diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/LengthsParser.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/LengthsParser.java
new file mode 100644
index 0000000000..d68cf9bac3
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/LengthsParser.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2022 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata;
+
+import static com.google.common.base.CharMatcher.whitespace;
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.lang.Integer.parseUnsignedInt;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.base.Splitter;
+import com.google.common.collect.ContiguousSet;
+import com.google.common.collect.ImmutableSortedSet;
+import java.util.List;
+import java.util.NavigableSet;
+import java.util.TreeSet;
+
+/** Parses strings of form "4,7-9,11" which are used as length specifiers across LPN metadata */
+public final class LengthsParser {
+
+  private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults(whitespace());
+  private static final Splitter RANGE_SPLITTER =
+      Splitter.on('-').trimResults(whitespace()).limit(2);
+  private static final CharMatcher ALLOWED_CHARACTERS =
+      CharMatcher.inRange('0', '9').or(CharMatcher.anyOf("-,")).or(whitespace());
+
+  /** Returns the set of integers specified by this string. */
+  public static ImmutableSortedSet<Integer> parseLengths(String s) {
+    checkArgument(
+        ALLOWED_CHARACTERS.matchesAllOf(s),
+        "Length specifier contains forbidden characters: %s",
+        s);
+    NavigableSet<Integer> lengths = new TreeSet<>();
+    for (String lengthOrRange : COMMA_SPLITTER.split(s)) {
+      if (lengthOrRange.contains("-")) {
+        List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange);
+        int lo = parseUnsignedInt(lohi.get(0));
+        int hi = parseUnsignedInt(lohi.get(1));
+        checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi);
+        checkArgument(
+            lengths.isEmpty() || lo > lengths.last(),
+            "Numbers in length specifier are out of order: %s",
+            s);
+        lengths.addAll(ContiguousSet.closed(lo, hi));
+      } else {
+        int length = parseUnsignedInt(lengthOrRange);
+        checkArgument(
+            lengths.isEmpty() || length > lengths.last(),
+            "Numbers in length specifier are out of order: %s",
+            s);
+        lengths.add(length);
+      }
+    }
+    return ImmutableSortedSet.copyOf(lengths);
+  }
+
+  private LengthsParser() {}
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java
index 392e62db4c..24c81db8e2 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java
@@ -71,7 +71,7 @@ public final class Types {
           .put(XML_VOIP, VOIP)
           .put(XML_UAN, UAN)
           .put(XML_VOICEMAIL, VOICEMAIL)
-          .build();
+          .buildOrThrow();
 
   /** Returns the set of valid XML type names. */
   public static ImmutableSet<String> getXmlNames() {
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcher.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcher.java
new file mode 100644
index 0000000000..589ffab924
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcher.java
@@ -0,0 +1,317 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher;
+
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode.State;
+
+/**
+ * Matches phone number regular expressions based on compact compiled data generated by
+ * {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler
+ * MatcherCompiler}. Typically the phone number regular expression will be compiled at build time
+ * and the resulting matcher data will be packaged into the binary which needs it, or downloaded at
+ * run time.
+ * <p>
+ * This class is designed to be lightweight and fast, and should be simple to implement in many
+ * different languages (C++, Python, JS, etc.).
+ *
+ * TODO: Consider UnisgnedBytes.toInt(x) to avoid lots of (x & 0xFF).
+ */
+public abstract class DigitSequenceMatcher {
+
+  /** Possible result types returned by a match operation. */
+  public enum Result {
+    /** The match operation was a success and the input was matched. */
+    MATCHED,
+    /** The match operation failed because unexpected input was encountered. */
+    INVALID,
+    /**
+     * The match operation failed because the input terminated too soon (ie, the input was a
+     * valid prefix for the matcher).
+     */
+    TOO_SHORT,
+    /**
+     * The match operation failed due to the existence of additional input after matching had
+     * completed (ie, the the input would have matched if it were shorter).
+     */
+    TOO_LONG;
+  }
+
+  /** An iterator of {@code int}, used to supply the matcher with a sequence of input digits. */
+  public interface DigitSequence {
+    /** Returns true if there are more digits available. */
+    boolean hasNext();
+
+    /**
+     * Return the next digit value (from 0 to 9 inclusive, not a char value). The matcher does not
+     * test for invalid digits, so returning values outside this range will have undefined results,
+     * including false positive results.
+     */
+    int next();
+  }
+
+  /** Internal abstraction to allow matching over either byte arrays or strings. */
+  interface DataView {
+    /** Return the unsigned byte value at the given offset from the current position. */
+    int peekByte(int offset);
+
+    /** Return the unsigned byte value at the current position and move ahead 1 byte. */
+    int readByte();
+
+    /** Return the unsigned short value at the current position and move ahead 2 bytes. */
+    int readShort();
+
+    /** Return the unsigned int value at the current position and move ahead 4 bytes. */
+    int readInt();
+
+    /** Adjust the current position by the given (non-negative) offset. */
+    State branch(int offset);
+
+    /**
+     * Adjust the current position by the unsigned byte offset value read from the current
+     * position plus the given index. This is used to implement maps and branching ranges.
+     */
+    State jumpTable(int index);
+  }
+
+  /**
+   * Creates a new matcher which reads instructions directly from the given byte array. Typically
+   * it is expected that this method will consume byte arrays packaged into a binary at build time
+   * (the MatcherCompiler is not suitable for direct parsing of regular expressions at run time).
+   * <p>
+   * See {@code MatcherCompiler.compile(...)}.
+   */
+  public static DigitSequenceMatcher create(byte[] data) {
+    if (data.length == 0) {
+      throw new IllegalArgumentException("matcher data cannot be empty");
+    }
+    return new ByteArrayMatcher(data);
+  }
+
+  /**
+   * Creates a new matcher which reads instructions from the given string. Typically it is expected
+   * that this method will be used when matcher data is packaged as literal Java string constants
+   * in (auto-generated) source files.
+   * <p>
+   * See {@code MatcherCompiler.compileToUnquotedJavaSourceString(...)}.
+   */
+  public static DigitSequenceMatcher create(String data) {
+    if (data.isEmpty()) {
+      throw new IllegalArgumentException("matcher data cannot be empty");
+    }
+    return new StringMatcher(data);
+  }
+
+  abstract DataView newDataView();
+
+  abstract int size();
+
+  /** Matches the input against this matcher, returning a result code. */
+  public Result match(DigitSequence in) {
+    State state = runMatcher(in);
+    switch (state) {
+      case TERMINAL:
+        return !in.hasNext() ? Result.MATCHED : Result.TOO_LONG;
+      case TRUNCATED:
+        return Result.TOO_SHORT;
+      case INVALID:
+        return Result.INVALID;
+      default:
+        throw new AssertionError("unexpected state: " + state);
+    }
+  }
+
+  private State runMatcher(DigitSequence in) {
+    DataView data = newDataView();
+    State state;
+    do {
+      state = OpCode.decode(data.peekByte(0)).execute(data, in);
+    } while (state == State.CONTINUE);
+    return state;
+  }
+
+  @Override
+  public String toString() {
+    int size = size();
+    StringBuilder out = new StringBuilder(size + " :: [ ");
+    DataView data = newDataView();
+    while (size-- > 0) {
+      out.append(Integer.toHexString(data.readByte())).append(", ");
+    }
+    out.setLength(out.length() - 2);
+    out.append(" ]");
+    return out.toString();
+  }
+
+  /** A matcher for reading instructions from a byte array. */
+  private static final class ByteArrayMatcher extends DigitSequenceMatcher {
+
+    private class ByteArrayData implements DataView {
+      int position = 0;
+
+      @Override public int peekByte(int offset) {
+        return bytes[position + offset] & 0xFF;
+      }
+
+      @Override public int readByte() {
+        return bytes[position++] & 0xFF;
+      }
+
+      @Override public int readShort() {
+        return (readByte() << 8) | readByte();
+      }
+
+      @Override public int readInt() {
+        return (readShort() << 16) | readShort();
+      }
+
+      @Override public State branch(int offset) {
+        position += offset;
+        return offset != 0 ? State.CONTINUE : State.TERMINAL;
+      }
+
+      @Override public State jumpTable(int index) {
+        return branch(peekByte(index));
+      }
+    }
+
+    private final byte[] bytes;
+
+    private ByteArrayMatcher(byte[] data) {
+      this.bytes = data;
+    }
+
+    @Override
+    DataView newDataView() {
+      return new ByteArrayData();
+    }
+
+    @Override
+    int size() {
+      return bytes.length;
+    }
+  }
+
+  /** A matcher for reading instructions from a String. */
+  private static final class StringMatcher extends DigitSequenceMatcher {
+
+    /*
+     * Note: Using unsigned shift "x >>> 1" is more likely to be free as part of a data load
+     * instruction than "x / 2".
+     */
+
+    private class StringData implements DataView {
+      int position = 0;
+
+      @Override public int peekByte(int offset) {
+        offset += position;
+        int data = bytes.charAt(offset >>> 1);
+        // char := hi [ even-byte | odd-byte  ] lo
+        return (offset & 1) != 0 ? data & 0xFF : data >>> 8;
+      }
+
+      @Override public int readByte() {
+        int data = bytes.charAt(position >>> 1);
+        // char := hi [ even-byte | odd-byte  ] lo
+        data = (position & 1) != 0 ? data & 0xFF : data >>> 8;
+        position += 1;
+        return data;
+      }
+
+      @Override public int readShort() {
+        int data = bytes.charAt(position >>> 1);
+        // Adding 2 early does not affect odd/even (but does reference next char).
+        position += 2;
+        if ((position & 1) != 0) {
+          data = ((data & 0xFF) << 8) | (bytes.charAt(position >>> 1) >>> 8);
+        }
+        return data;
+      }
+
+      @Override public int readInt() {
+        return (readShort() << 16) | readShort();
+      }
+
+      @Override public State branch(int offset) {
+        position += offset;
+        return offset != 0 ? State.CONTINUE : State.TERMINAL;
+      }
+
+      @Override public State jumpTable(int index) {
+        return branch(peekByte(index));
+      }
+    }
+
+    private final String bytes;
+
+    private StringMatcher(String bytes) {
+      this.bytes = bytes;
+    }
+
+    @Override
+    DataView newDataView() {
+      return new StringData();
+    }
+
+    @Override
+    int size() {
+      int size = 2 * bytes.length();
+      if ((bytes.charAt(bytes.length() - 1) & 0xFF) == 0xFF) {
+        size -= 1;
+      }
+      return size;
+    }
+  }
+
+  /** An iterator of {@code int} that yields a sequence of input digits from a string. */
+  private static final class StringDigits implements DigitSequence {
+    private final CharSequence number;
+    private int n = 0;
+
+    private StringDigits(CharSequence number) {
+      this.number = number;
+    }
+
+    @Override public int next() {
+      if (n < 0 || n >= number.length()) {
+        throw new IndexOutOfBoundsException(
+            "index '" + n + "' out of bounds for input: " + number);
+      }
+      char c = number.charAt(n);
+      if (c < '0' || c > '9') {
+        throw new IllegalArgumentException(
+            "non-digit character '" + c + "' [" + ((int) c) + "] at index " + n + " in: " + number);
+      }
+      n++;
+      return c - '0';
+    }
+
+    @Override public boolean hasNext() {
+      return n < number.length();
+    }
+  }
+
+  /**
+   * Returns an instance of DigitSequence based on the input string. The input string may only
+   * contain digits.
+   */
+  public static DigitSequence digitsFromString(CharSequence number) {
+    return new StringDigits(number);
+  }
+
+  /** A matcher has no internal state and is just a factory for data specific implementations. */
+  private DigitSequenceMatcher() { }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/OpCode.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/OpCode.java
new file mode 100644
index 0000000000..6d59a0fb9d
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/OpCode.java
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher;
+
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.DataView;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.DigitSequence;
+
+/**
+ * Implementation of instructions for the phone number matcher state machine.
+ * <p>
+ * <h3>Jump Tables</h3>
+ *
+ * Several instructions use a "jump table" concept which is simply a contiguous region of bytes
+ * containing offsets from which a new position is calculated. The new position is the current
+ * position (at the start of the jump table) plus the value of the chosen jump offset.
+ *
+ * <pre>{@code
+ * [    ...    | JUMP_0 | JUMP_1 | ... | JUMP_N |    ...    |  DEST  |  ...
+ *  position --^            ^                               ^
+ *             `---index ---'                               |
+ *  offset     `----------------  [ position + index ] -----'
+ *
+ *  position = position + unsignedByteValueAt(position + index)
+ * }</pre>
+ *
+ * A jump offset of zero signifies that the state jumped to is terminal (this avoids having to jump
+ * to a termination byte). A jump table will always occur immediately after an associated
+ * instruction and the instruction's stated size includes the number of bytes in the jump table.
+ */
+public enum OpCode {
+  /**
+   * Jumps ahead by between 1 and 4095 bytes from the end of this opcode. This opcode does not
+   * consume any input.
+   * <p>
+   * This is a variable length instruction, taking one byte for offsets up to 15 and (if EXT is set)
+   * two bytes for larger offsets up to 4095. The jump offset signifies how many bytes to skip after
+   * this instruction.
+   * <p>
+   * As a special case, a single byte branch with a jump offset of zero (represented by a single
+   * zero byte) can be used to signify that the current state is terminal and the state machine
+   * should exit (a zero jump offset never makes sense in any instruction).
+   *
+   * <pre>{@code
+   * [ 0 | 0 |  JUMP   ]
+   * [ 0 | 1 |  JUMP   |  EXT_JUMP   ]
+   *  <3>.<1>.<-- 4 -->.<---- 8 ---->
+   * }</pre>
+   */
+  BRANCH(0) {
+    @Override
+    State execute(DataView data, DigitSequence ignored) {
+      int op = data.readByte();
+      int offset = op & 0xF;
+      if ((op & (1 << 4)) != 0) {
+        offset = (offset << 8) + data.readByte();
+      }
+      return data.branch(offset);
+    }
+  },
+  /**
+   * Accepts a single input (and transition to a single state). Inputs not matching "VAL" are
+   * invalid from the current state. If "TRM" is set then the state being transitioned from may
+   * terminate.
+   *
+   * <pre>{@code
+   * [ 1 |TRM|  VAL  ]
+   *  <3>.<1>.<- 4 ->
+   * }</pre>
+   */
+  SINGLE(1) {
+    @Override
+    State execute(DataView data, DigitSequence in) {
+      int op = data.readByte();
+      if (!in.hasNext()) {
+        return ((op & (1 << 4)) != 0) ? State.TERMINAL : State.TRUNCATED;
+      }
+      int n = in.next();
+      return ((op & 0xF) == n) ? State.CONTINUE : State.INVALID;
+    }
+  },
+  /**
+   * Accept any input to transition to a single state one or more times.
+   * <p>
+   * If "TRM" is set then every state that is transitioned from may terminate.
+   *
+   * <pre>{@code
+   * [ 2 |TRM| NUM-1 ]
+   *  <3>.<1>.<- 4 ->
+   * }</pre>
+   */
+  ANY(2) {
+    @Override
+    State execute(DataView data, DigitSequence in) {
+      int op = data.readByte();
+      int num = (op & 0xF) + 1;
+      boolean isTerminating = (op & (1 << 4)) != 0;
+      while (num-- > 0) {
+        if (!in.hasNext()) {
+          return isTerminating ? State.TERMINAL : State.TRUNCATED;
+        }
+        in.next();
+      }
+      return State.CONTINUE;
+    }
+  },
+  /**
+   * Accepts multiple inputs to transition to one or two states. The bit-set has the Nth bit set if
+   * we should accept digit N (bit-0 is the lowest bit of the 2 byte form of the instruction).
+   * <p>
+   * This is a variable length instruction which either treats non-matched inputs as invalid
+   * (2 byte form) or branches to one of two states via a 2-entry jump table (4 byte form).
+   * <p>
+   * If "TRM" is set then the state being transitioned from may terminate.
+   *
+   * <pre>{@code
+   * [ 3 |TRM| 0 |---|   BIT SET  ]
+   * [ 3 |TRM| 1 |---|   BIT SET  |  JUMP_IN  | JUMP_OUT  ]
+   *  <3>.<1>.<1>.<1>.<--- 10 --->.<--- 8 --->.<--- 8 --->
+   * }</pre>
+   */
+  RANGE(3) {
+    @Override
+    State execute(DataView data, DigitSequence in) {
+      int op = data.readShort();
+      if (!in.hasNext()) {
+        return ((op & (1 << 12)) != 0) ? State.TERMINAL : State.TRUNCATED;
+      }
+      int n = in.next();
+      if ((op & (1 << 11)) == 0) {
+        // 2 byte form, non-matched input is invalid.
+        return ((op & (1 << n)) != 0) ? State.CONTINUE : State.INVALID;
+      }
+      // 4 byte form uses jump table (use bitwise negation so a set bit becomes a 0 index).
+      return data.jumpTable((~op >>> n) & 1);
+    }
+  },
+  /**
+   * Accept multiple inputs to transition to between one and ten states via jump offsets. Inputs
+   * not encoded in "CODED MAP" are invalid from the current state.
+   * <p>
+   * Because there is no room for a termination bit in this instruction, there is an alternate
+   * version, {@code TMAP}, which should be used when transitioning from a terminating state.
+   * <p>
+   * TODO: Figure out if we can save one bit here and merge MAP and TMAP.
+   *
+   * <pre>{@code
+   * [ 4 |      CODED MAP       |  JUMP_1   |  ... |  JUMP_N   ]
+   *  <3>.<-------- 29 -------->.<--- 8 --->.  ... .<--- 8 --->
+   * }</pre>
+   */
+  MAP(4) {
+    @Override
+    State execute(DataView data, DigitSequence in) {
+      return map(data, in, State.TRUNCATED);
+    }
+  },
+  /**
+   * Like {@code MAP} but transitions from a terminating state.
+   */
+  TMAP(5) {
+    @Override
+    State execute(DataView data, DigitSequence in) {
+      return map(data, in, State.TERMINAL);
+    }
+  };
+
+  /** The types of states that the state-machine can be in. */
+  public enum State {
+    CONTINUE, TERMINAL, INVALID, TRUNCATED;
+  }
+
+  private static final OpCode[] VALUES = values();
+
+  /**
+   * Encode maps as 29 bits where each digit takes a different number of bits to encode its offset.
+   * Specifically:
+   * <ul>
+   * <li>The first entry (matching 0) has only two possible values (it is either not present or maps
+   * to the first entry in the jump table), so takes only 1 bit.
+   * <li>The second entry (matching 1) has three possible values (not present or maps to either the
+   * first or second entry in the jump table), so it takes 2 bits.
+   * <li>In general the entry matching digit N has (N+1) possible states and takes log2(N+1) bits.
+   * </ul>
+   */
+  private static final long MAP_SHIFT_BITS = 0L << 0 | // 1 bit  (1x, mask=1)
+      1L << 5 | 3L << 10 |                             // 2 bits (2x, mask=3)
+      5L << 15 | 8L << 20 | 11L << 25 | 14L << 30 |    // 3 bits (4x, mask=7)
+      17L << 35 | 21L << 40 | 25L << 45;               // 4 bits (3x, mask=F)
+
+  /**
+   * A table of values with which to mask the coded jump table map, after shifting it. Each nibble
+   * is a mask of up to 4 bits to extract the encoded index from a map instruction after it has
+   * been shifted.
+   */
+  private static final long MAP_MASK_BITS = 0xFFF7777331L;
+
+  /**
+   * Returns the number of bits we must shift the coded jump table map for a digit with value
+   * {@code n} such that the jump index is in the lowest bits.
+   */
+  public static int getMapShift(int n) {
+    return (int) (MAP_SHIFT_BITS >>> (5 * n)) & 0x1F;
+  }
+
+  /**
+   * Returns a mask we must apply to the shifted jump table map to extract only the jump index from
+   * the lowest bits.
+   */
+  public static int getMapMask(int n) {
+    return (int) (MAP_MASK_BITS >>> (4 * n)) & 0xF;
+  }
+
+  /**
+   * Executes a map instruction by decoding the map data and selecting a jump offset to apply.
+   */
+  private static State map(DataView data, DigitSequence in, State noInputState) {
+    int op = data.readInt();
+    if (!in.hasNext()) {
+      return noInputState;
+    }
+    int n = in.next();
+    // Coded indices are 1-to-10 (0 is the "invalid" state).
+    int index = ((op >>> getMapShift(n)) & getMapMask(n));
+    if (index == 0) {
+      return State.INVALID;
+    }
+    // Jump offsets are zero based.
+    return data.jumpTable(index - 1);
+  }
+
+  /**
+   * Returns the opcode associated with the given unsigned byte value (the first byte of any
+   * instruction).
+   */
+  static OpCode decode(int unsignedByte) {
+    return VALUES[unsignedByte >>> 5];
+  }
+
+  private OpCode(int code) {
+    // Assertion checks during enum creation. Opcodes must be 3 bits and match the ordinal of the
+    // enum (this prevents issues if reordering enums occurs).
+    if ((code & ~0x7) != 0 || code != ordinal()) {
+      throw new AssertionError("bad opcode value: " + code);
+    }
+  }
+
+  abstract State execute(DataView data, DigitSequence in);
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherBytes.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherBytes.java
new file mode 100644
index 0000000000..cd35b394fe
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherBytes.java
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.io.ByteArrayDataOutput;
+import com.google.common.io.ByteStreams;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler.Sequence;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Renders the final bytecode representation for the matcher by connecting sequences of operations
+ * together and fixing-up offsets and branch instructions. This is essentially the higher-level
+ * aspect of matcher bytecode compilation.
+ * <p>
+ * Unlike {@link MatcherCompiler} in which a lot of the data is immutable (because sequences can
+ * be defined in isolation), there's a lot of mutable state in this class due to the need to build
+ * and manage offsets between the sequences, which relies on the order in which other sequences
+ * have been rendered.
+ */
+class MatcherBytes {
+  /**
+   * A partial order on byte sequences based on their size. This is not "equivalent to equals" and
+   * must not be used to construct an ordered set.
+   */
+  private static final Comparator<SequenceBytes> DECREASING_BY_SIZE =
+      new Comparator<SequenceBytes>() {
+        @Override public int compare(SequenceBytes lhs, SequenceBytes rhs) {
+          return Integer.compare(rhs.size(), lhs.size());
+        }
+      };
+
+  /**
+   * Sequences we have not considered for rendering yet.
+   */
+  private final List<Sequence> remainingSequences;
+  /**
+   * Candidate sequences whose dependent sequences have all been rendered, and which may themselves
+   * now be rendered.
+   */
+  private final Set<Sequence> canditiateSequences = new LinkedHashSet<>();
+  /**
+   * Sequences which have been rendered (used to determine when other sequences become renderable).
+   */
+  private final Set<Sequence> compiledSequences = new HashSet<>();
+  /**
+   * A map from which are final nodes of a sequence to the sequence they belong to. The key set of
+   * this map is a subset of all nodes.
+   */
+  private final Map<DfaNode, SequenceBytes> sequenceMap = new HashMap<>();
+  /**
+   * A list of compiled byte sequences in reverse order (ie, the sequence with the terminal node
+   * in it is first in this list and the sequence with the initial node is last). Compilation
+   * occurs in reverse order to allow offsets between sequences to be calculated as we go.
+   */
+  private final List<SequenceBytes> reverseOrder = new ArrayList<>();
+  /** Statistics instance for collecting inforation about the compilation. */
+  private final Statistics stats;
+
+  MatcherBytes(Iterable<Sequence> allSequences, Statistics stats) {
+    // Our set of remaining sequences just starts out as all the sequences.
+    // Sequences are processed in reverse order, so reverse the sorted sequences before beginning.
+    remainingSequences = Lists.reverse(Lists.newArrayList(allSequences));
+    this.stats = Preconditions.checkNotNull(stats);
+  }
+
+  /**
+   * Compiles all sequences into a single byte buffer suitable for use by a
+   * {@code DigitSequenceMatcher}.
+   */
+  byte[] compile() {
+    int totalSequenceCount = remainingSequences.size();
+    // Sequences with not dependent sequences are compiled first.
+    compileFinalSequences();
+    // Determine new candidate sequences.
+    while (compiledSequences.size() < totalSequenceCount) {
+      // We won't always add a new candidate sequence each time around the loop, but the set
+      // should never be emptied until the final sequence is processed.
+      for (Iterator<Sequence> it = remainingSequences.iterator(); it.hasNext();) {
+        Sequence s = it.next();
+        if (compiledSequences.containsAll(s.unorderedOutSequences())) {
+          canditiateSequences.add(s);
+          it.remove();
+        }
+      }
+      // Compile the next candidate sequence.
+      Sequence toCompile = Iterables.get(canditiateSequences, 0);
+      reverseOrder.add(compile(toCompile));
+      compiledSequences.add(toCompile);
+      canditiateSequences.remove(toCompile);
+    }
+    // We should have always exhausted the candidate sequences when we've finished rendering.
+    Preconditions.checkState(remainingSequences.isEmpty());
+    Preconditions.checkState(canditiateSequences.isEmpty());
+    return concatSequenceBytesInForwardOrder();
+  }
+
+  /**
+   * Compiles any sequences which have no dependencies and orders them by size to heuristically
+   * reduce the size of branch offsets needed to reach them.
+   */
+  private void compileFinalSequences() {
+    for (Iterator<Sequence> it = remainingSequences.iterator(); it.hasNext();) {
+      Sequence s = it.next();
+      if (s.isFinal()) {
+        reverseOrder.add(compile(s));
+        compiledSequences.add(s);
+        it.remove();
+      }
+    }
+    // They are ordered by size (shortest first) because this will tend to reduce the number of
+    // 2-byte branch instructions needed to jump to them.
+    Collections.sort(reverseOrder, DECREASING_BY_SIZE);
+  }
+
+  /** Compiles a sequence for which all dependent sequences have already been compiled. */
+  private SequenceBytes compile(Sequence sequence) {
+    // Note: Even non branching sequences will have an out node here.
+    Map<DfaNode, Integer> offsetMap = new HashMap<>();
+    for (DfaNode out : sequence.getOutStates()) {
+      SequenceBytes targetSequence = sequenceMap.get(out);
+      int offsetToStartOfSequence = 0;
+      for (int n = reverseOrder.size() - 1; n >= 0 && reverseOrder.get(n) != targetSequence; n--) {
+        offsetToStartOfSequence += reverseOrder.get(n).size();
+      }
+      if (offsetToStartOfSequence > 0 && targetSequence.isTerminator()) {
+        // If we would explicitly jump to a terminator sequence, we can just exit
+        // unconditionally at this point.
+        offsetToStartOfSequence = Operation.TERMINATION_OFFSET;
+      }
+      offsetMap.put(out, offsetToStartOfSequence);
+    }
+    SequenceBytes compiled = new SequenceBytes(sequence, offsetMap, stats);
+    sequenceMap.put(sequence.getInitialState(), compiled);
+    return compiled;
+  }
+
+  /** Creates the final, single buffer of bytecode instructions for the matcher. */
+  private byte[] concatSequenceBytesInForwardOrder() {
+    try {
+      ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
+      for (int n = reverseOrder.size() - 1; n >= 0; n--) {
+        outBuffer.write(reverseOrder.get(n).getBytes());
+      }
+      return outBuffer.toByteArray();
+    } catch (IOException e) {
+      throw new AssertionError("ByteArrayOutputStream cannot throw IOException");
+    }
+  }
+
+  /** Renders a sequence (along with a map of branch offsets) to its bytecode form. */
+  private static byte[] renderSequence(
+      Sequence sequence, Map<DfaNode, Integer> offsetMap, Statistics stats) {
+    // Because our operations come from a sequence, we can assert that only the last operation
+    // could possibly be branching.
+    List<Operation> ops = sequence.createOps();
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ByteArrayDataOutput outBytes = ByteStreams.newDataOutput(baos);
+    // Write all but the last operation (there are no branches to worry about).
+    for (int n = 0; n < ops.size() - 1; n++) {
+      ops.get(n).writeTo(outBytes, null, stats);
+    }
+    Operation lastOp = Iterables.getLast(ops);
+    if (lastOp.isTerminating()) {
+      stats.record(Statistics.Type.TERMINATING);
+    }
+    if (lastOp.isBranching()) {
+      // A branching operation uses the offset map directly to fill in its jump table information.
+      lastOp.writeTo(outBytes, offsetMap, stats);
+    } else {
+      // A non-branching operation does not use offsets, but we may need to add an explicit branch
+      // instruction after it.
+      lastOp.writeTo(outBytes, null, stats);
+      if (!offsetMap.isEmpty()) {
+        // When adding a branch instruction, there should only be a single offset to use.
+        int offset = Iterables.getOnlyElement(offsetMap.values());
+        if (offset >= 0) {
+          // The offset could still be zero, but this is handled correctly by writeBranch().
+          Operation.writeBranch(outBytes, offset, stats);
+        } else {
+          // This is a terminal instruction and the matcher should exit.
+          Preconditions.checkArgument(offset == Operation.TERMINATION_OFFSET);
+          Operation.writeTerminator(outBytes, stats);
+        }
+      }
+    }
+    return baos.toByteArray();
+  }
+
+  /**
+   * A single compiled sequence of operations. This is just a holder for a {@link Sequence} and the
+   * compiled bytes it produces.
+   */
+  static class SequenceBytes {
+    private final Sequence sequence;
+    private final byte[] bytes;
+
+    SequenceBytes(Sequence sequence, Map<DfaNode, Integer> offsetMap, Statistics stats) {
+      this.sequence = sequence;
+      this.bytes = renderSequence(sequence, offsetMap, stats);
+    }
+
+    Sequence getSequence() {
+      return sequence;
+    }
+
+    boolean isTerminator() {
+      return sequence.isFinal() && sequence.size() == 1;
+    }
+
+    int size() {
+      return bytes.length;
+    }
+
+    byte[] getBytes() {
+      return bytes;
+    }
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompiler.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompiler.java
new file mode 100644
index 0000000000..621226301f
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompiler.java
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static com.google.common.collect.ImmutableSet.toImmutableSet;
+import static java.lang.Integer.numberOfTrailingZeros;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import com.google.common.graph.MutableValueGraph;
+import com.google.common.graph.ValueGraph;
+import com.google.common.graph.ValueGraphBuilder;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Function;
+
+/**
+ * Compiles non-capturing phone number regular expressions into sequences of bytes suitable for
+ * creating {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher
+ * DigitSequenceMatcher} instances.
+ */
+public final class MatcherCompiler {
+  /**
+   * Compiles the given {@code RangeTree} into a sequence of bytes suitable for creating a
+   * {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher
+   * DigitSequenceMatcher}.
+   */
+  public static byte[] compile(RangeTree dfa) {
+    return compile(dfa, Statistics.NO_OP);
+  }
+
+  /**
+   * As {@link #compile(RangeTree)} but additionally accepts a {@link Statistics} instance
+   * to record metrics about the compilation.
+   */
+  public static byte[] compile(RangeTree dfa, Statistics stats) {
+    return new MatcherCompiler(dfa).compile(stats);
+  }
+
+  /** The DFA from which the matcher data is to be compiled. */
+  private final ValueGraph<DfaNode, DfaEdge> dfa;
+  /** The unique initial node of the DFA. */
+  private final DfaNode init;
+  /**
+   * A map from nodes which are at the beginning of a sequence to that sequence. Not all nodes
+   * will be present in the key set of this map.
+   */
+  private final ImmutableMap<DfaNode, Sequence> seqStart;
+
+  /**
+   * Builds a graph directly from the DFA in a RangeTree.
+   *
+   * <p>Rather than deal with the DFA tree directly (which is deliberately opaque as a data
+   * structure) we serialize it into a more maleable ValueGraph. This allows simpler graph
+   * traversal while maintaining a simple-as-possible node/edge structure. It's okay to reuse the
+   * RangeTree types {@code DfaNode} and {@code DfaEdge} here because they have the expected
+   * semantics (e.g. conforming to equals/hashcode etc...) but care must be taken not to keep the
+   * instances around for a long time, since this will keep larger parts of the original DFA alive
+   * in the garbage collector (but this is fine since only bytes are returned from this class).
+   */
+  private static ValueGraph<DfaNode, DfaEdge> buildGraph(RangeTree dfa) {
+    Preconditions.checkArgument(!dfa.isEmpty());
+    MutableValueGraph<DfaNode, DfaEdge> graph =
+        ValueGraphBuilder.directed().allowsSelfLoops(false).build();
+    graph.addNode(dfa.getInitial());
+    DfaVisitor visitor = new DfaVisitor() {
+      @Override
+      public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
+        boolean isFirstVisit = graph.addNode(target);
+        graph.putEdgeValue(source, target, edge);
+        if (isFirstVisit) {
+          target.accept(this);
+        }
+      }
+    };
+    dfa.accept(visitor);
+    return graph;
+  }
+
+  /**
+   * Creates a {@code MatcherCompiler} from the given automaton by generating all the
+   * {@code Sequence}'s of operations necessary to represent it.
+   */
+  MatcherCompiler(RangeTree ranges) {
+    this.dfa = buildGraph(ranges);
+    this.init = ranges.getInitial();
+    LinkedHashMap<DfaNode, Sequence> start = new LinkedHashMap<>();
+    buildSequencesFrom(init, start);
+    this.seqStart = ImmutableMap.copyOf(start);
+  }
+
+  /**
+   * Returns the output targets of the given node sorted according to the lowest "accepting" digit
+   * on the corresponding edge. This ordering is necessary for stability, but also correctness when
+   * building mapping operations. Apart from special cases (e.g. only one output) this is the only
+   * method which should be used to obtain output nodes.
+   */
+  private ImmutableSet<DfaNode> sortedOutputs(DfaNode source) {
+    Comparator<DfaNode> ordering = Comparator.comparing(
+        target -> numberOfTrailingZeros(dfa.edgeValue(source, target).get().getDigitMask()));
+    return dfa.successors(source).stream().sorted(ordering).collect(toImmutableSet());
+  }
+
+  /** Returns the single output target of the given node (or throws an exception). */
+  private DfaNode singleOutput(DfaNode source) {
+    return Iterables.getOnlyElement(dfa.successors(source));
+  }
+
+  /**
+   * Builds the output map from a given node in the DFA in the correct order. Note that because
+   * ImmutableSetMultimap.Builder orders keys based on the first time they are added, and we add
+   * keys (nodes) in the order of the input by which they can be reached, the keys of the returned
+   * map are ordered by the lowest digit in their set of values (inputs). This is necessary for
+   * correct behaviour in the "Mapping" operation.
+   */
+  private ImmutableMap<DfaNode, Integer> getOutMap(DfaNode source) {
+    Function<DfaNode, Integer> getMask =
+        target -> dfa.edgeValue(source, target).get().getDigitMask();
+    return sortedOutputs(source).stream().collect(toImmutableMap(Function.identity(), getMask));
+  }
+
+  /**
+   * Recursively builds sequences by traversing the DFA and grouping successive sub-sequences of
+   * nodes which neither branch, nor are branched to. Each such sub-sequence is represented by a
+   * {@code Sequence} instance (a list of non-branching operations, optionally terminated with a
+   * branching operation).
+   */
+  private void buildSequencesFrom(DfaNode start, LinkedHashMap<DfaNode, Sequence> map) {
+    if (map.containsKey(start)) {
+      return;
+    }
+    DfaNode current = start;
+    ImmutableList.Builder<DfaNode> nodes = ImmutableList.builder();
+    while (true) {
+      nodes.add(current);
+      if (dfa.outDegree(current) != 1) {
+        break;
+      }
+      DfaNode next = singleOutput(current);
+      if (dfa.inDegree(next) > 1) {
+        break;
+      }
+      current = next;
+    }
+    Sequence seq = new Sequence(nodes.build());
+    map.put(start, seq);
+    // Recurse from the outputs at the end of the sequence according to their edge values.
+    // IMPORTANT: We must not use "current.successors()" here since we need the order of insertion
+    // to be well defined and ValueGraph does not make good enough promises about node ordering.
+    for (DfaNode out : sortedOutputs(current)) {
+      buildSequencesFrom(out, map);
+    }
+  }
+
+  /** Creates and compiles a {@code MatcherBytes} instance to render the output bytes. */
+  byte[] compile(Statistics stats) {
+    return createMatcherBytes(stats).compile();
+  }
+
+  /** Creates a mutable {@code MatcherBytes} instance which will render the output bytes. */
+  MatcherBytes createMatcherBytes(Statistics stats) {
+    return new MatcherBytes(seqStart.values(), stats);
+  }
+
+  /**
+   * A contiguous sub-sequence of nodes in the DFA which neither branch, nor are branched to.
+   * <p>
+   * The important property of a {@code Sequence} is that branching may only occur at the end of a
+   * {@code Sequence} and branches may only jump to the start of another {@code Sequence}. This
+   * makes it easier to separate the compilation of operations (inside sequences) from the
+   * management of branches and offsets (between sequences).
+   */
+  class Sequence {
+    private final ImmutableList<DfaNode> nodes;
+
+    Sequence(ImmutableList<DfaNode> nodes) {
+      checkArgument(!nodes.isEmpty());
+      this.nodes = nodes;
+    }
+
+    private Operation getOp(DfaNode node) {
+      return Operation.from(node.canTerminate(), getOutMap(node));
+    }
+
+    /**
+     * Returns the operations representing this sequence, merging successive operations where
+     * possible. The final list of operations is guaranteed to have at most one branching operation
+     * which (if present) will always be the last element in the list.
+     */
+    List<Operation> createOps() {
+      List<Operation> ops = new ArrayList<>();
+      Operation current = getOp(nodes.get(0));
+      for (int n = 1; n < nodes.size(); n++) {
+        Operation next = getOp(nodes.get(n));
+        Operation merged = current.mergeWith(next);
+        if (merged != null) {
+          current = merged;
+        } else {
+          ops.add(current);
+          current = next;
+        }
+      }
+      ops.add(current);
+      return ops;
+    }
+
+    DfaNode getInitialState() {
+      return Iterables.get(nodes, 0);
+    }
+
+    DfaNode getFinalState() {
+      return Iterables.getLast(nodes);
+    }
+
+    Set<DfaNode> getOutStates() {
+      return sortedOutputs(getFinalState());
+    }
+
+    /**
+     * Not the same as "terminating" for an operation. A sequence is "final" if no other sequences
+     * follow it. Normally there is only one final sequence in a normalized DFA, even if that
+     * sequence contains only a single terminating node. However not all terminating nodes are
+     * in final sequences.
+     */
+    boolean isFinal() {
+      return getOutStates().isEmpty();
+    }
+
+    /** Returns the number of nodes that this sequence represents. */
+    int size() {
+      return nodes.size();
+    }
+
+    ImmutableSet<Sequence> unorderedOutSequences() {
+      return getOutStates().stream().map(seqStart::get).collect(toImmutableSet());
+    }
+
+    @Override
+    public String toString() {
+      return toString(new StringBuilder(), 0).toString();
+    }
+
+    private StringBuilder toString(StringBuilder buf, int indent) {
+      List<Operation> ops = createOps();
+      appendIndent(buf, indent).append(
+          String.format("{%s} %s", nodes.get(0), Joiner.on(" >> ").join(ops)));
+      ImmutableList<DfaNode> outs = Iterables.getLast(ops).getOuts();
+      if (!outs.isEmpty()) {
+        buf.append(" {\n");
+        for (DfaNode out : outs) {
+          seqStart.get(out).toString(buf, indent + 1);
+        }
+        appendIndent(buf, indent).append("}\n");
+      } else {
+        buf.append('\n');
+      }
+      return buf;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return seqStart.get(init).toString();
+  }
+
+  private static StringBuilder appendIndent(StringBuilder out, int indent) {
+    for (int n = 0; n < indent; n++) {
+      out.append("  ");
+    }
+    return out;
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Operation.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Operation.java
new file mode 100644
index 0000000000..b324e675df
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Operation.java
@@ -0,0 +1,600 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.common.collect.ImmutableSetMultimap.flatteningToImmutableSetMultimap;
+import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
+import static java.lang.Integer.numberOfTrailingZeros;
+import static java.util.stream.Collectors.joining;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableSetMultimap;
+import com.google.common.collect.Iterables;
+import com.google.common.io.ByteArrayDataOutput;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.Statistics.Type;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * A specific instance of a number matching operation derived from a DFA. Operations are created by
+ * analyzing a sequence in a DFA and knowing how to write the corresponding instruction(s) as bytes
+ * (to be processed by DigitSequenceMatcher or similar).
+ */
+abstract class Operation {
+  /** Represents the digits which can be accepted during matching operations. */
+  private enum Digit {
+    // Order of enums must match the digit value itself (this is checked for in the constructor).
+    ZERO(0), ONE(1), TWO(2), THREE(3), FOUR(4), FIVE(5), SIX(6), SEVEN(7), EIGHT(8), NINE(9);
+
+    private static final Digit[] VALUES = values();
+
+    // Iteration order is order of enum declaration (and thus also the value order).
+    public static final ImmutableSet<Digit> ALL = ImmutableSet.copyOf(VALUES);
+
+    Digit(int value) {
+      // No need to store the digit value if we know it matches our ordinal value.
+      Preconditions.checkArgument(value == ordinal());
+    }
+
+    /** Returns the digit corresponding to the integral value in the range {@code 0...9}. */
+    public static Digit of(int n) {
+      return VALUES[n];
+    }
+
+    /**
+     * Returns the set of digits corresponding to a bit-mask in which bits 0 to 9 represent the
+     * corresponding digits.
+     */
+    public static ImmutableSet<Digit> fromMask(int mask) {
+      Preconditions.checkArgument(mask >= 1 && mask <= ALL_DIGITS_MASK);
+      if (mask == ALL_DIGITS_MASK) {
+        return ALL;
+      }
+      ImmutableSet.Builder<Digit> digits = ImmutableSet.builder();
+      for (int n = 0; n <= 9; n++) {
+        if ((mask & (1 << n)) != 0) {
+          digits.add(VALUES[n]);
+        }
+      }
+      return digits.build();
+    }
+
+    /** Returns the integer value of this digit instance. */
+    public int value() {
+      return ordinal();
+    }
+  }
+
+  /**
+   * An invalid jump offset indicating that instead of jumping to a new instruction, the state
+   * machine can just terminate (used to avoid jumping directly to the termination instruction).
+   */
+  static final int TERMINATION_OFFSET = -1;
+
+  /** The number of bytes required by a "long" branch instruction. */
+  private static final int LONG_BRANCH_SIZE = 2;
+
+  private final boolean isTerminating;
+  private final boolean isBranching;
+
+  private Operation(boolean isTerminating, boolean isBranching) {
+    this.isTerminating = isTerminating;
+    this.isBranching = isBranching;
+  }
+
+  /** Returns whether this operation can terminate the state machine when it has been reached. */
+  boolean isTerminating() {
+    return isTerminating;
+  }
+
+  /**
+   * Returns whether this operation is branching. A branching operation has more than one output
+   * node it can reach.
+   */
+  boolean isBranching() {
+    return isBranching;
+  }
+
+  /**
+   * Returns the output nodes of this operation. For branching operations the order of multiple
+   * output nodes is defined by the operation itself (most operations are not branching and have
+   * only one output state anyway).
+   */
+  abstract ImmutableList<DfaNode> getOuts();
+
+  /** Returns the op-code for this operation, used when writing out instruction bytes. */
+  abstract OpCode getOpCode();
+
+  /** Writes this operation out as a series of instruction bytes. */
+  abstract void writeImpl(
+      ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats);
+
+  void writeTo(ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats) {
+    if (isTerminating()) {
+      stats.record(Type.TERMINATING);
+    }
+    writeImpl(out, offsetMap, stats);
+  }
+
+  /**
+   * Merges two adjacent operations (a poor man's compiler optimization). Useful for collapsing
+   * sequences of "ANY" operations. If this instruction cannot be merged with the given "next"
+   * instruction then it should return {@code null}, which is the default behavior.
+   *
+   * @param next the operation following this operation which we will try and merge with.
+   */
+  Operation mergeWith(Operation next) {
+    return null;
+  }
+
+  /** Writes a branch instructions into the output byte sequence. */
+  static void writeBranch(ByteArrayDataOutput out, int jump, Statistics stats) {
+    Preconditions.checkArgument(jump >= 0 && jump < 0x1000, "invalid jump: " + jump);
+    if (jump == 0) {
+      stats.record(Type.CONTINUATION);
+    } else if (jump < 16) {
+      stats.record(Type.SHORT_BRANCH);
+      out.writeByte((OpCode.BRANCH.ordinal() << 5) | jump);
+    } else {
+      stats.record(jump < 0x100 ? Type.MEDIUM_BRANCH : Type.LONG_BRANCH);
+      out.writeShort((OpCode.BRANCH.ordinal() << 13) | (1 << 12) | jump);
+    }
+  }
+
+  /** Writes a termination byte into the output byte sequence. */
+  static void writeTerminator(ByteArrayDataOutput out, Statistics stats) {
+    stats.record(Type.FINAL);
+    out.writeByte(0);
+  }
+
+  /**
+   * Creates a new operation to represent the output state transition given by {@code outMasks}.
+   * Note that where multiple nodes exist in {@code outMasks}, their ordering must be consistent
+   * with the {@code Mapping} operation (whereby nodes are ordered by the lowest bit set in the
+   * corresponding mask.
+   */
+  static Operation from(boolean isTerminating, ImmutableMap<DfaNode, Integer> outMasks) {
+    if (outMasks.isEmpty()) {
+      // No out nodes; then it's a "Terminal" operation.
+      Preconditions.checkState(isTerminating);
+      return new Operation.Terminal();
+    }
+    ImmutableList<DfaNode> outStates = outMasks.keySet().asList();
+    if (outStates.size() == 1) {
+      DfaNode outState = Iterables.getOnlyElement(outStates);
+      int digitMask = outMasks.get(outState);
+      if (Integer.bitCount(digitMask) == 1) {
+        // One output state reached by a single input; then it's a "Single" operation.
+        return new Operation.Single(isTerminating, numberOfTrailingZeros(digitMask), outStates);
+      }
+      if (digitMask == ALL_DIGITS_MASK) {
+        // One output state reached by any input; then it's an "Any" operation.
+        return new Operation.Any(isTerminating, 1, outStates);
+      }
+      // One output state reached other general input; then it's a "Range" operation.
+      return new Operation.Range(isTerminating, digitMask, outStates);
+    }
+    if (outStates.size() == 2) {
+      // Test if the 2 disjoint masks cover all inputs. If so, use a shorter branch operation.
+      List<Integer> masks = outMasks.values().asList();
+      if ((masks.get(0) | masks.get(1)) == ALL_DIGITS_MASK) {
+        // One of two output nodes reached by any input; then it's a branching "Range" operation.
+        return new Operation.Range(isTerminating, masks.get(0), outStates);
+      }
+    }
+    // Any other combination of nodes or inputs; then it's a "Mapping" operation. This code relies
+    // on the ordering of entries in the output map to correspond to edge order.
+    return new Operation.Mapping(isTerminating, outMasks);
+  }
+
+  /** Respresents a state with no legal outputs, which must be a terminal state in the matcher. */
+  private static final class Terminal extends Operation {
+    Terminal() {
+      super(true, true);
+    }
+
+    @Override
+    OpCode getOpCode() {
+      return OpCode.BRANCH;
+    }
+
+    @Override
+    ImmutableList<DfaNode> getOuts() {
+      return ImmutableList.of();
+    }
+
+    @Override
+    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> unused, Statistics stats) {
+      writeTerminator(out, stats);
+    }
+
+    @Override
+    public String toString() {
+      return "TERMINAL";
+    }
+  }
+
+  /**
+   * Respresents a state which can be transitioned from to a single output state via a single input
+   * (eg, "0" or "9").
+   */
+  private static final class Single extends Operation {
+    private final Digit digit;
+    private final ImmutableList<DfaNode> outs;
+
+    Single(boolean isTerminating, int digit, ImmutableList<DfaNode> outs) {
+      super(isTerminating, false);
+      Preconditions.checkArgument(outs.size() == 1);
+      this.digit = Digit.of(digit);
+      this.outs = outs;
+    }
+
+    @Override
+    OpCode getOpCode() {
+      return OpCode.SINGLE;
+    }
+
+    @Override ImmutableList<DfaNode> getOuts() {
+      return outs;
+    }
+
+    @Override
+    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> unused, Statistics stats) {
+      //  <--------- 1 byte --------->
+      // [ OPCODE | TRM |    VALUE    ]
+      out.writeByte((getOpCode().ordinal() << 5)
+          | (isTerminating() ? (1 << 4) : 0)
+          | digit.value());
+    }
+
+    @Override
+    public String toString() {
+      return format(digit.value());
+    }
+  }
+
+  /**
+   * Respresents a state which can be transitioned from to a single output state via any input
+   * (ie, "\d"). Successive "Any" oeprations can be merged to represent a repeated sequence
+   * (eg, "\d{5}").
+   */
+  private static final class Any extends Operation {
+    private final int count;
+    private final ImmutableList<DfaNode> outs;
+
+    Any(boolean isTerminating, int count, ImmutableList<DfaNode> outs) {
+      super(isTerminating, false);
+      Preconditions.checkArgument(outs.size() == 1);
+      Preconditions.checkArgument(count > 0);
+      this.count = count;
+      this.outs = outs;
+    }
+
+    @Override
+    OpCode getOpCode() {
+      return OpCode.ANY;
+    }
+
+    @Override ImmutableList<DfaNode> getOuts() {
+      return outs;
+    }
+
+    @Override
+    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> unused, Statistics stats) {
+      int remainingCount = count;
+      //  <--------- 1 byte --------->
+      // [ OPCODE | TRM |   COUNT-1   ]
+      int anyN = (getOpCode().ordinal() << 5) | (isTerminating() ? (1 << 4) : 0);
+      while (remainingCount > 16) {
+        out.writeByte(anyN | 15);
+        remainingCount -= 16;
+      }
+      out.writeByte(anyN | remainingCount - 1);
+    }
+
+    @Override
+    public Operation mergeWith(Operation next) {
+      if (next.getOpCode() == OpCode.ANY && isTerminating() == next.isTerminating()) {
+        return new Any(isTerminating(), this.count + ((Any) next).count, ((Any) next).outs);
+      }
+      return null;
+    }
+
+    @Override
+    public String toString() {
+      return format(count);
+    }
+  }
+
+  /**
+   * Represents a state which can be transitioned from via an arbitrary set of inputs to either
+   * one or two output nodes (eg, "[23-69]" or "[0-4]X|[5-9]Y"). In the case where there are two
+   * output nodes, any input must reach one of the two possible nodes (ie, there is no invalid
+   * input).
+   */
+  private static final class Range extends Operation {
+    private final ImmutableSet<Digit> digits;
+    private final ImmutableList<DfaNode> outs;
+
+    Range(boolean isTerminating, int digitMask, ImmutableList<DfaNode> outs) {
+      super(isTerminating, outs.size() == 2);
+      Preconditions.checkArgument(outs.size() <= 2);
+      this.digits = Digit.fromMask(digitMask);
+      this.outs = outs;
+    }
+
+    @Override
+    OpCode getOpCode() {
+      return OpCode.RANGE;
+    }
+
+    /**
+     * For branching Range operations (with 2 output nodes), the order is that the state matched
+     * by {@code digits} is the first state and the state reached by any other input is second.
+     */
+    @Override ImmutableList<DfaNode> getOuts() {
+      return outs;
+    }
+
+    @Override
+    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats) {
+      //  <-------------- 2 bytes --------------> <-------- 2 bytes --------->
+      // [ OPCODE | TRM |  0  |     BIT SET      ]
+      // [ OPCODE | TRM |  1  |     BIT SET      |   JUMP_IN   |   JUMP_OUT   ]
+      out.writeShort((getOpCode().ordinal() << 13)
+          | (isTerminating() ? (1 << 12) : 0)
+          | (isBranching() ? (1 << 11) : 0)
+          | asBitMask(digits));
+      if (isBranching()) {
+        writeJumpTable(out, ImmutableList.of(
+            offsetMap.get(outs.get(0)), offsetMap.get(outs.get(1))), stats);
+      }
+    }
+
+    @Override
+    public String toString() {
+      return format(asRangeString(digits));
+    }
+  }
+
+  /**
+   * Represents a state in the matcher which can be transitioned from via an arbitrary set of
+   * inputs, to an arbitrary set of nodes. This is the most general form of operation and (apart
+   * from branches) provides the only truly necessary instruction in the matcher; everything else
+   * is just some specialization of this operation.
+   */
+  private static final class Mapping extends Operation {
+    private final ImmutableSetMultimap<DfaNode, Digit> nodeMap;
+
+    Mapping(boolean isTerminating, ImmutableMap<DfaNode, Integer> outMasks) {
+      super(isTerminating, true);
+      this.nodeMap = outMasks.entrySet().stream()
+          .collect(flatteningToImmutableSetMultimap(
+              Entry::getKey, e -> Digit.fromMask(e.getValue()).stream()));
+    }
+
+    @Override
+    OpCode getOpCode() {
+      return isTerminating() ? OpCode.TMAP : OpCode.MAP;
+    }
+
+    /**
+     * For Mapping operations, output node order is defined by the lowest digit by which that
+     * node can be reached. For example, if a map operation can reach three nodes {@code A},
+     * {@code B} and {@code C} via inputs in the ranges {@code [1-38]}, {@code [4-6]} and
+     * {@code [09]} respectively, then they will be ordered {@code (C, A, B)}.
+     */
+    @Override ImmutableList<DfaNode> getOuts() {
+      return nodeMap.keySet().asList();
+    }
+
+    @Override
+    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats) {
+      //  <------------ 4 bytes ------------> <-- 1 byte per offset --->
+      // [ OPCODE |        CODED MAP         |  JUMP_1  | ... | JUMP_N  ]
+      out.writeInt((getOpCode().ordinal() << 29) | asCodedMap(nodeMap));
+      ImmutableList<Integer> offsets =
+          getOuts().stream().map(offsetMap::get).collect(toImmutableList());
+      writeJumpTable(out, offsets, stats);
+    }
+
+    @Override
+    public String toString() {
+      return format(nodeMap.asMap().values().stream()
+          .map(Operation::asRangeString).collect(joining(", ")));
+    }
+  }
+
+  String format(Object extra) {
+    return String.format("%s%s : %s", getOpCode(), isTerminating() ? "*" : "", extra);
+  }
+
+  /**
+   * Returns an integer with the lowest 10 bits set in accordance with the digits in the given set.
+   */
+  private static int asBitMask(ImmutableSet<Digit> digits) {
+    int bitMask = 0;
+    for (Digit digit : digits) {
+      bitMask |= (1 << digit.value());
+    }
+    return bitMask;
+  }
+
+  /**
+   * Returns a integer with the lowest 29 bits set to encode an arbitrary mapping from input digit
+   * to an output index. The 29 bits are partitioned such that lower inputs require fewer bits to
+   * encode (output indices are assigned as they are encountered, starting at the first input).
+   * Each digit can then be quickly mapped to either its 1-indexed output node, or 0 if the input
+   * was invalid.
+   */
+  private static int asCodedMap(ImmutableSetMultimap<DfaNode, Digit> nodeMap) {
+    int codedMap = 0;
+    List<DfaNode> outs = nodeMap.keySet().asList();
+    for (int n = 0; n < outs.size(); n++) {
+      for (Digit digit : nodeMap.get(outs.get(n))) {
+        // Coded indices are 1-to-10 (0 is the "invalid" node).
+        codedMap |= ((n + 1) << OpCode.getMapShift(digit.value()));
+      }
+    }
+    return codedMap;
+  }
+
+  /**
+   * Writes a sequence of offsets representing a unsigned byte-based jump table after either a
+   * Mapping or Range instruction. This accounts correctly for the need to introduce a new
+   * "trampoline" branch instruction after the jump table (when the desired offset is too large
+   * to fit in a single unsigned byte).
+   * <p>
+   * Offsets are either:
+   * <ul>
+   * <li>The number of bytes to jump from the end of the current {@code Sequence} bytes to the
+   *     start of the destination {@code Sequence} bytes.
+   * <li>{@code -1} to indicate that a terminal node has been reached.
+   * </ul>
+   * <p>
+   * Note that the offset written into the jump table itself must be relative to the beginning of
+   * the jump table and so must be adjusted by the number of bytes in the jump table and any other
+   * branch instructions that follow it. This it probably the most awkward logic in the entire
+   * compiler.
+   */
+  static void writeJumpTable(ByteArrayDataOutput out, List<Integer> offsets,
+      Statistics stats) {
+    int jumpTableSize = offsets.size();
+    boolean needsExtraBranches = false;
+    for (int n = 0; n < jumpTableSize && !needsExtraBranches; n++) {
+      // Check whether the adjusted offset (ie, the one we would write) will fit in a byte.
+      // It's no issue to have offsets of -1 as it can never trigger "needsExtraBranches".
+      needsExtraBranches = (offsets.get(n) + jumpTableSize >= 0x100);
+    }
+    if (needsExtraBranches) {
+      // We only get here if at least one offset (after adjustment by the original jump table size)
+      // would not fit into a byte. Now we must calculate exactly how many extra branches we are
+      // going to need. For this we must assume the worst case adjustment of "3 x jumpTableSize"
+      // which is 1 byte for the jump table offset and 2 bytes for the extra branch for every entry.
+      // This is pessimistic because there will now be cases where we write a trampoline jump for
+      // an offset that could have fitted had we not assumed that we might need the extra space for
+      // the branch. However these cases are rare enough that we choose to ignore them.
+      int maxOffsetAdjust = ((1 + LONG_BRANCH_SIZE) * jumpTableSize);
+      int extraBranchCount = 0;
+      for (int n = 0; n < jumpTableSize; n++) {
+        if (offsets.get(n) + maxOffsetAdjust >= 0x100) {
+          extraBranchCount += 1;
+        }
+      }
+      // Now we know a reasonable upper bound for how many extra branches are needed, use this to
+      // adjust the actual offsets and write them. When a "trampoline" branch instruction is needed
+      // we split the offset so the jump table jumps to the branch instruction and that jumps the
+      // rest. Branch instructions are positioned, in order, immediately after the jump table.
+      List<Integer> extraBranchOffsets = new ArrayList<>();
+      int totalOffsetAdjust = jumpTableSize + (LONG_BRANCH_SIZE * extraBranchCount);
+      for (int n = 0; n < jumpTableSize; n++) {
+        int offset = offsets.get(n);
+        if (offset >= 0) {
+          int worstCaseOffset = offset + maxOffsetAdjust;
+          // Get the actual total offset we want to jump by.
+          offset += totalOffsetAdjust;
+          // Use the worst case offset here so we repeat exactly the same decision as the loop
+          // above (otherwise we might add fewer branches which would screw up our offsets).
+          if (worstCaseOffset >= 0x100) {
+            // Split the original offset, recording the jump to the trampoline branch as well as
+            // the branch offset itself. Note that the offset adjustment changes as more trampoline
+            // branches are encountered (but the overall offset jumped remains the same).
+            int extraBranchIndex = extraBranchOffsets.size();
+            // This offset will always be small (max jump table is 10 entries, so offset to the
+            // last possible branch will be at most 28 bytes).
+            int branchInstructionOffset = jumpTableSize + (LONG_BRANCH_SIZE * extraBranchIndex);
+            // Subtract one additional branch instruction here because when we trampoline jump, we
+            // jump to the start of the branch instruction, but jump away from the end of it.
+            extraBranchOffsets.add((offset - branchInstructionOffset) - LONG_BRANCH_SIZE);
+            offset = branchInstructionOffset;
+          }
+          // Write the total offset (offset must be < 0x100 here as worstCaseOffset was < 0x100).
+          Preconditions.checkState(offset < 0x100, "jump too long: %s", offset);
+          out.writeByte(offset);
+        } else {
+          // If the destination of this jump would just be a termination instruction, just write
+          // the termination byte here directly (no point jumping to the termination byte).
+          Preconditions.checkArgument(offset == TERMINATION_OFFSET, "bad offset: %s", offset);
+          writeTerminator(out, stats);
+        }
+      }
+      // Write out the trampoline jumps in the order they were found.
+      for (int offset : extraBranchOffsets) {
+        stats.record(Type.DOUBLE_JUMP);
+        Operation.writeBranch(out, offset, stats);
+      }
+    } else {
+      // In the simple case, there are no extra branches, so we just write the offsets we have.
+      // This has the same effect as running the code above with (extraBranchCount == 0) but can be
+      // reached more optimistically because we don't need to account for the worst case offset
+      // adjustment when deciding if it's safe to just use the offsets we were given. It's a form
+      // of hysteresis between the no-branch and extra-branch cases.
+      for (int n = 0; n < jumpTableSize; n++) {
+        int offset = offsets.get(n);
+        if (offset >= 0) {
+          offset += jumpTableSize;
+          Preconditions.checkState(offset < 0x100, "jump too long: " + offset);
+          out.writeByte(offset);
+        } else {
+          writeTerminator(out, stats);
+        }
+      }
+    }
+  }
+
+  // Helper function for asRanges() to print a single range (eg, "[014-7]").
+  private static String asRangeString(Collection<Digit> digits) {
+    StringBuilder out = new StringBuilder();
+    out.append("[");
+    Digit lhs = null;
+    Digit rhs = null;
+    for (Digit digit : digits) {
+      if (lhs != null) {
+        if (digit.value() == rhs.value() + 1) {
+          rhs = digit;
+          continue;
+        }
+        if (rhs != lhs) {
+          if (rhs.value() > lhs.value() + 1) {
+            out.append("-");
+          }
+          out.append(rhs.value());
+        }
+      }
+      lhs = digit;
+      rhs = digit;
+      out.append(lhs.value());
+    }
+    if (rhs != lhs) {
+      if (rhs.value() > lhs.value() + 1) {
+        out.append("-");
+      }
+      out.append(rhs.value());
+    }
+    out.append("]");
+    return out.toString();
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Statistics.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Statistics.java
new file mode 100644
index 0000000000..e175425b35
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Statistics.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
+
+
+/**
+ * A simple class for capturing statistics produced during regular expression compilation. This can
+ * be used to quantify how proposed changes to the byte-code definition will affect the size of any
+ * compiled matcher bytes.
+ */
+public interface Statistics {
+
+  public static final Statistics NO_OP = new Statistics() {
+    @Override public void record(Type type) { }
+  };
+
+  /** The type of things we are counting. */
+  public enum Type {
+    SHORT_BRANCH,
+    MEDIUM_BRANCH,
+    LONG_BRANCH,
+    DOUBLE_JUMP,
+    CONTINUATION,
+    TERMINATING,
+    FINAL;
+  }
+
+  /** Records an operation of the specified type during bytecode compilation. */
+  void record(Type type);
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java
index 5c3e312c40..96ab4dcf9c 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java
@@ -96,7 +96,7 @@ public static CsvTable<ExampleNumberKey> toCsv(
     for (Cell<PhoneRegion, ValidNumberType, DigitSequence> c : table.cellSet()) {
       out.put(ExampleNumberKey.of(c.getRowKey(), c.getColumnKey()), NUMBER, c.getValue());
     }
-    return CsvTable.from(SCHEMA, out.build());
+    return CsvTable.from(SCHEMA, out.buildOrThrow());
   }
 
   /**
@@ -110,7 +110,7 @@ public static CsvTable<ExampleNumberKey> toCsv(
     for (ExampleNumberKey k : csv.getKeys()) {
       out.put(k.getRegion(), k.getType(), csv.getOrDefault(k, NUMBER));
     }
-    return out.build();
+    return out.buildOrThrow();
   }
 
   private static Stream<String> write(ExampleNumberKey key) {
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java
index 396f735e4a..795414188f 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java
@@ -25,7 +25,7 @@
 
 /**
  * A CSV provider which reads files rooted in a given directory. The file layout should match that
- * in the CSV metadata directory ({@code googledata/third_party/i18n/phonenumbers/metadata}).
+ * in the CSV metadata directory ({@code third_party/libphonenumber_metadata/metadata}).
  */
 public final class FileBasedCsvLoader implements CsvDataProvider {
   /** Returns a CSV loader which reads files from the given base directory. */
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java
index a9cbca6642..e977096ce6 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java
@@ -81,7 +81,7 @@ public static ImmutableMap<String, FormatSpec> toFormatSpecs(CsvTable<String> fo
               formats.getOrDefault(id, NATIONAL_PREFIX_OPTIONAL),
               toComment(formats.getOrDefault(id, COMMENT))));
     }
-    return specs.build();
+    return specs.buildOrThrow();
   }
 
   private static Optional<String> toOptional(String s) {
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java
index 8fb662e376..82282eab69 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java
@@ -23,13 +23,13 @@
 import static java.util.function.Function.identity;
 import static java.util.stream.Collectors.joining;
 
-import com.google.common.base.Splitter;
 import com.google.common.collect.ContiguousSet;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableRangeSet;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.Range;
+import com.google.i18n.phonenumbers.metadata.LengthsParser;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
 import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag;
@@ -49,18 +49,18 @@
 import com.google.i18n.phonenumbers.metadata.table.Schema;
 import java.time.ZoneId;
 import java.util.List;
-import java.util.NavigableSet;
 import java.util.Optional;
 import java.util.TreeSet;
 import java.util.stream.Stream;
 
 /**
  * The schema of the standard "Ranges" table with rows keyed by {@link RangeKey} and columns:
+ *
  * <ol>
- *   <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not
- *       the same a XmlNumberType or ValidNumberType). All ranges should be assigned a type.
- *   <li>{@link #TARIFF}: The expected cost of numbers in a range (combining TYPE and TARIFF
- *       can yield the internal ValidNumberType). All ranges should be assigned a tariff.
+ *   <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not the same a
+ *       XmlNumberType or ValidNumberType). All ranges should be assigned a type.
+ *   <li>{@link #TARIFF}: The expected cost of numbers in a range (combining TYPE and TARIFF can
+ *       yield the internal ValidNumberType). All ranges should be assigned a tariff.
  *   <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from
  *       numbers in a range for local dialling. Local only lengths are derived using this column.
  *   <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its
@@ -72,8 +72,8 @@
  *       applied).
  *   <li>{@link #TIMEZONE}: The timezone names for a range (or empty to imply the default
  *       timezones). Multiple timezones can be specific if separated by {@code '&'}.
- *   <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are
- *       set {@code true} that range is valid within the region {@code XX}.
+ *   <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are set
+ *       {@code true} that range is valid within the region {@code XX}.
  *   <li>{@link #GEOCODES}: A group of String columns in the form "Geocode:XXX" containing the
  *       geocode string for a range, where {@code XXX} is the language code of the string.
  *   <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid.
@@ -81,6 +81,7 @@
  * </ol>
  *
  * <p>Rows keys are serialized via the marshaller and produce leading columns:
+ *
  * <ol>
  *   <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]").
  *   <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9").
@@ -88,16 +89,16 @@
  */
 public final class RangesTableSchema {
   /**
-   * External number type enum. This is technically much better than ValidNumberType since it
-   * splits type and cost properly. Unfortunately the internal logic of the phonenumber library
-   * doesn't really cope with this, which is why we convert to {@code XmlRangesSchema} before
-   * creating legacy data structures.
+   * External number type enum. This is technically much better than ValidNumberType since it splits
+   * type and cost properly. Unfortunately the internal logic of the phonenumber library doesn't
+   * really cope with this, which is why we convert to {@code XmlRangesSchema} before creating
+   * legacy data structures.
    *
-   * <p>This enum can be modified as new types are requested from data providers, providing the
-   * type mapping to ValidNumberType is updated appropriately. Note that until it's clear that
-   * mapping types such as {@link #M2M} to {@link ValidNumberType#UNKNOWN} will work okay, we
-   * should be very careful about using the additional types. Additional types need to be removed
-   * before the generated table can be turned into a {@link NumberingScheme}.
+   * <p>This enum can be modified as new types are requested from data providers, providing the type
+   * mapping to ValidNumberType is updated appropriately. Note that until it's clear that mapping
+   * types such as {@link #M2M} to {@link ValidNumberType#UNKNOWN} will work okay, we should be very
+   * careful about using the additional types. Additional types need to be removed before the
+   * generated table can be turned into a {@link NumberingScheme}.
    */
   public enum ExtType {
     /** Default value not permitted in real data. */
@@ -125,14 +126,14 @@ public enum ExtType {
 
     private static final ImmutableMap<ExtType, ValidNumberType> TYPE_MAP =
         Stream.of(
-            ExtType.FIXED_LINE,
-            ExtType.MOBILE,
-            ExtType.FIXED_LINE_OR_MOBILE,
-            ExtType.PAGER,
-            ExtType.PERSONAL_NUMBER,
-            ExtType.UAN,
-            ExtType.VOICEMAIL,
-            ExtType.VOIP)
+                ExtType.FIXED_LINE,
+                ExtType.MOBILE,
+                ExtType.FIXED_LINE_OR_MOBILE,
+                ExtType.PAGER,
+                ExtType.PERSONAL_NUMBER,
+                ExtType.UAN,
+                ExtType.VOICEMAIL,
+                ExtType.VOIP)
             .collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name())));
 
     public Optional<ValidNumberType> toValidNumberType() {
@@ -185,9 +186,9 @@ public Timezones(String s) {
       Column.of(ExtTariff.class, "Tariff", ExtTariff.STANDARD_RATE);
 
   /**
-   * The "Area Code Length" column in the range table, denoting the length of a prefix which can
-   * be removed from all numbers in a range to obtain locally diallable numbers. If an
-   * "area code" is not optional for dialling, then no value should be set here.
+   * The "Area Code Length" column in the range table, denoting the length of a prefix which can be
+   * removed from all numbers in a range to obtain locally diallable numbers. If an "area code" is
+   * not optional for dialling, then no value should be set here.
    */
   public static final Column<Integer> AREA_CODE_LENGTH =
       Column.ofUnsignedInteger("Area Code Length");
@@ -226,12 +227,13 @@ public Timezones(String s) {
   public static final Column<String> COMMENT = Column.ofString("Comment");
 
   /** Marshaller for constructing CsvTable from RangeTable. */
-  private static final CsvKeyMarshaller<RangeKey> MARSHALLER = new CsvKeyMarshaller<>(
-      RangesTableSchema::write,
-      RangesTableSchema::read,
-      Optional.of(RangeKey.ORDERING),
-      "Prefix",
-      "Length");
+  private static final CsvKeyMarshaller<RangeKey> MARSHALLER =
+      new CsvKeyMarshaller<>(
+          RangesTableSchema::write,
+          RangesTableSchema::read,
+          Optional.of(RangeKey.ORDERING),
+          "Prefix",
+          "Length");
 
   /** The non-key columns of a range table. */
   public static final Schema TABLE_COLUMNS =
@@ -251,10 +253,10 @@ public Timezones(String s) {
           .build();
 
   /**
-   * The columns for the serialized CSV table. Note that the "REGIONS" column group is replaced
-   * by the CSV regions multi-value. This allows region codes to be serialize in a single column
-   * (which is far nicer when looking at data in a spreadsheet). In the range table, this is
-   * normalized into the boolean column group (because that's far nicer to work with).
+   * The columns for the serialized CSV table. Note that the "REGIONS" column group is replaced by
+   * the CSV regions multi-value. This allows region codes to be serialize in a single column (which
+   * is far nicer when looking at data in a spreadsheet). In the range table, this is normalized
+   * into the boolean column group (because that's far nicer to work with).
    */
   private static final Schema CSV_COLUMNS =
       Schema.builder()
@@ -289,17 +291,21 @@ public static CsvTable<RangeKey> toCsv(RangeTable table) {
     for (Change c : table.toChanges()) {
       for (RangeKey k : RangeKey.decompose(c.getRanges())) {
         regions.clear();
-        c.getAssignments().forEach(a -> {
-          // We special case the regions column, converting a group of boolean columns into a
-          // multi-value of region codes. If the column is in the group, it must hold Booleans.
-          if (regionColumns.contains(a.column())) {
-            if (a.value().map(((Column<Boolean>) a.column())::cast).orElse(Boolean.FALSE)) {
-              regions.add(REGIONS.getKey(a.column()));
-            }
-          } else {
-            csv.put(k, a);
-          }
-        });
+        c.getAssignments()
+            .forEach(
+                a -> {
+                  // We special case the regions column, converting a group of boolean columns into
+                  // a
+                  // multi-value of region codes. If the column is in the group, it must hold
+                  // Booleans.
+                  if (regionColumns.contains(a.column())) {
+                    if (a.value().map(((Column<Boolean>) a.column())::cast).orElse(Boolean.FALSE)) {
+                      regions.add(REGIONS.getKey(a.column()));
+                    }
+                  } else {
+                    csv.put(k, a);
+                  }
+                });
         // We can do this out-of-sequence because the table will order its columns.
         if (!regions.isEmpty()) {
           csv.put(k, CSV_REGIONS, Regions.of(regions));
@@ -311,22 +317,28 @@ public static CsvTable<RangeKey> toCsv(RangeTable table) {
 
   /**
    * Converts a {@link RangeKey} based {@link CsvTable} to a {@link RangeTable}, preserving the
-   * original table columns. The {@link CsvSchema} of the returned table is not guaranteed to be
-   * the {@link #SCHEMA} instance if the given table had different columns.
+   * original table columns. The {@link CsvSchema} of the returned table is not guaranteed to be the
+   * {@link #SCHEMA} instance if the given table had different columns.
    */
   public static RangeTable toRangeTable(CsvTable<RangeKey> csv) {
     RangeTable.Builder out = RangeTable.builder(TABLE_COLUMNS);
     for (RangeKey k : csv.getKeys()) {
       Change.Builder change = Change.builder(k.asRangeTree());
-      csv.getRow(k).forEach((c, v) -> {
-        // We special case the regions column, converting a comma separated list of region codes
-        // into a series of boolean column assignments.
-        if (c.equals(CSV_REGIONS)) {
-          CSV_REGIONS.cast(v).getValues().forEach(r -> change.assign(REGIONS.getColumn(r), true));
-        } else {
-          change.assign(c, v);
-        }
-      });
+      csv.getRow(k)
+          .forEach(
+              (c, v) -> {
+                // We special case the regions column, converting a comma separated list of region
+                // codes
+                // into a series of boolean column assignments.
+                if (c.equals(CSV_REGIONS)) {
+                  CSV_REGIONS
+                      .cast(v)
+                      .getValues()
+                      .forEach(r -> change.assign(REGIONS.getColumn(r), true));
+                } else {
+                  change.assign(c, v);
+                }
+              });
       out.apply(change.build(), OverwriteMode.NEVER);
     }
     return out.build();
@@ -339,7 +351,8 @@ public static Stream<String> write(RangeKey key) {
 
   // Shared by ShortcodeTableSchema
   public static RangeKey read(List<String> parts) {
-    return RangeKey.create(RangeSpecification.parse(parts.get(0)), parseLengths(parts.get(1)));
+    return RangeKey.create(
+        RangeSpecification.parse(parts.get(0)), LengthsParser.parseLengths(parts.get(1)));
   }
 
   private static String formatLength(ImmutableSortedSet<Integer> lengthSet) {
@@ -364,33 +377,5 @@ private static String formatRange(Range<Integer> r) {
     }
   }
 
-  private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults();
-  private static final Splitter RANGE_SPLITTER = Splitter.on('-').trimResults().limit(2);
-
-  private static NavigableSet<Integer> parseLengths(String s) {
-    NavigableSet<Integer> lengths = new TreeSet<>();
-    for (String lengthOrRange : COMMA_SPLITTER.split(s)) {
-      if (lengthOrRange.contains("-")) {
-        List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange);
-        int lo = parseInt(lohi.get(0));
-        int hi = parseInt(lohi.get(1));
-        checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi);
-        checkArgument(lengths.isEmpty() || lo > lengths.last(), "Overlapping ranges: %s", s);
-        lengths.addAll(ContiguousSet.closed(lo, hi));
-      } else {
-        int length = parseInt(lengthOrRange);
-        checkArgument(lengths.isEmpty() || length > lengths.last(), "Overlapping ranges: %s", s);
-        lengths.add(length);
-      }
-    }
-    return lengths;
-  }
-
-  private static int parseInt(String s) {
-    return Integer.parseUnsignedInt(s, 10);
-  }
-
   private RangesTableSchema() {}
 }
-
-
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/AnyPath.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/AnyPath.java
new file mode 100644
index 0000000000..8571f5b6f9
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/AnyPath.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.util.Optional;
+
+/**
+ * Represents an NFA graph which accepts sequences of inputs of any digit (also known as "any-digit
+ * sequences"), possibly of variable length. For example, an {@code AnyPath} instance might accept
+ * a single input of any digit (i.e. equivalent to the regular expression {@code "\d"}), or it might
+ * accept sequences of any digits of length 4 or 6 (i.e. equivalent to the regular expression
+ * {@code "\d{4}\d{2}?"}.
+ *
+ * <p>As {@code AnyPath} instances are all restricted to only accepting any-digits sequences, the
+ * only interesting thing about them is the set of sequence lengths they accept.
+ */
+@AutoValue
+abstract class AnyPath implements Comparable<AnyPath> {
+  /**
+   * The special empty path which matches zero length input. This is useful as an identity value
+   * when constructing other paths but should never be a path in the graph.
+   */
+  public static final AnyPath EMPTY = new AutoValue_AnyPath(0x1);
+
+  /** The path matching exactly one input of any digit. */
+  public static final AnyPath SINGLE = of(0x2);
+
+  /** The path matching one or zero inputs of any digit. */
+  public static final AnyPath OPTIONAL = of(0x3);
+
+  @VisibleForTesting
+  static AnyPath of(int mask) {
+    Preconditions.checkArgument(mask > 1, "invalid path mask: %s", mask);
+    return new AutoValue_AnyPath(mask);
+  }
+
+  /**
+   * Returns a bit-mask representing the lengths of any-digit sequences accepted by this path.
+   * If bit-N is set, then this path accepts an N-length sequence of any digits.
+   */
+  abstract int mask();
+
+  /** Returns whether this path accepts an any-digit sequence of length {@code n}.*/
+  public boolean acceptsLength(int n) {
+    Preconditions.checkArgument(n >= 0 && n < 32, "invalid path length: %s", n);
+    return (mask() & (1 << n)) != 0;
+  }
+
+  /** Returns the maximum length any-sequence that this path will accept. */
+  public int maxLength() {
+    return (31 - Integer.numberOfLeadingZeros(mask()));
+  }
+
+  /**
+   * Returns whether this path is empty (i.e. accepts only zero length sequences). This is only
+   * useful when constructing paths and empty paths should never appear in an NFA graph.
+   */
+  public boolean isEmpty() {
+    return mask() == 0x1;
+  }
+
+  /**
+   * Extends this path by one input, potentially setting all input as optional. For example (using
+   * 'x' to represent a single "any digit" input):
+   * <ul>
+   *   <li>{@code "xx".extend(false) == "xxx"}
+   *   <li>{@code "xx".extend(true) == "(xxx)?"}
+   *   <li>{@code "xx(x)?".extend(false) == "xxx(x)?"}
+   *   <li>{@code "xx(x)?".extend(true) == "(xxx(x)?)?"}
+   * </ul>
+   */
+  public AnyPath extend(boolean allOptional) {
+    return of((mask() << 1) | (allOptional ? 0x1 : 0x0));
+  }
+
+  /**
+   * Joins the given path to this one, results in a new path which is equivalent to the
+   * concatenation of the regular expressions they represent. For example (using
+   * 'x' to represent a single "any digit" input):
+   * <ul>
+   *   <li>{@code "xx".join("xx") == "xxxx"}
+   *   <li>{@code "xx".join("x?") == "xx(x)?"}
+   * </ul>
+   */
+  public AnyPath join(AnyPath other) {
+    int newMask = 0;
+    // Include the length itself (which is always accepted).
+    for (int n = 0; n <= other.maxLength(); n++) {
+      if (other.acceptsLength(n)) {
+        newMask |= mask() << n;
+      }
+    }
+    return of(newMask);
+  }
+
+  /**
+   * Returns a new path which is equal to this path, except that it also accepts zero length
+   * sequences.
+   */
+  public AnyPath makeOptional() {
+    return of(mask() | 0x1);
+  }
+
+  /**
+   * Attempts to "factor" this path by the given path to produce a path such that
+   * {@code p.factor(q).join(q)} is equivalent to {@code p}. This is useful when trying to
+   * determine longest common paths. Factorizing may not succeed in cases where no common path
+   * exists (e.g. {@code "xx(xx)?".factor("x?")} fails because there is no way to join anything
+   * to the path {@code "x?"} to make it accept exactly 2 or 4 length any-digit sequences).
+   */
+  public Optional<AnyPath> factor(AnyPath other) {
+    int factor = mask() / other.mask();
+    if (factor > 1 && (other.mask() * factor) == mask()) {
+      return Optional.of(of(factor));
+    } else {
+      return Optional.empty();
+    }
+  }
+
+  @Override
+  public int compareTo(AnyPath other) {
+    return Integer.compare(mask(), other.mask());
+  }
+
+  @Override
+  public final String toString() {
+    // A non-obvious algorithm for getting a reasonable toString() using x's.
+    // Best understood via examples:
+    //
+    // 0001 is invalid as we cannot represent an optional zero-length sequence.
+    //
+    // Hi-bit-1 ==> 1 x
+    // 0010 -> x, 0011 -> (x)?
+    //
+    // Hi-bit-2 ==> 2 x's
+    // 0100 -> xx, 0101 -> (xx)?, 0110 -> x(x)?, 0111 -> (x(x)?)?
+    //
+    // Hi-bit-3 ==> 3 x's
+    // 1000 -> xxx,    1001 -> (xxx)?,    1010 -> x(xx)?,    1011 -> (x(xx)?)?
+    // 1100 -> xx(x)?, 1101 -> (xx(x)?)?, 1110 -> x(x(x)?)?, 1111 -> (x(x(x)?)?)?
+    //
+    // Rules:
+    // * For hi-bit M, there are M x's in the string.
+    // * For N < M; if bit-N is set, then a group starts after the Nth-x.
+    if (mask() == 0x1) {
+      return "<EMPTY>";
+    }
+    StringBuilder out = new StringBuilder();
+    for (int n = 0; n < maxLength(); n++) {
+      out.append('x');
+    }
+    // Loop high-to-low to prevent earlier insertions messing with the index.
+    for (int n = maxLength() - 1; n >= 0; n--) {
+      if (acceptsLength(n)) {
+        out.insert(n, '(');
+      }
+    }
+    // The number of opened groups was the number of set bits - 1.
+    for (int n = Integer.bitCount(mask()) - 1; n > 0; n--) {
+      out.append(")?");
+    }
+    return out.toString();
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Edge.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Edge.java
new file mode 100644
index 0000000000..34e984018a
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Edge.java
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * Value type for edges in NFA graphs of phone number regular expressions. Outside this package,
+ * this type is mainly used for examining NFA graphs which represent a regular expression,
+ * generated via {@link RangeTreeConverter#toNfaGraph}..
+ *
+ * <p>Note that the ordering of edges is carefully designed to attempt to replicate as much of the
+ * existing intuition about ordering in regular expressions as possible. This should result in any
+ * generated expressions being as close to existing hand edited expressions as possible.
+ */
+public abstract class Edge implements Comparable<Edge> {
+  /** API for visiting composite edges; see also {@link #accept(Visitor)}. */
+  public interface Visitor {
+    /** Visits a leaf node simple edge. */
+    void visit(SimpleEdge edge);
+    /**
+     * Visits a composited sequence of edges. Note that sequences only ever contain disjunctions or
+     * simple edges, but never other sequences. For edges "a", "b", "c", this represents the
+     * concatenated edge "abc".
+     */
+    void visitSequence(List<Edge> edges);
+    /**
+     * Visits a disjunction of parallel edges. Note that disjunctions only ever contain sequences
+     * or simple edges, but never other disjunctions. For edges "a", "b", "c", this represents the
+     * disjunctive group "(a|b|c)".
+     */
+    void visitGroup(Set<Edge> edges, boolean isOptional);
+  }
+
+  // The singleton epsilon edge.
+  private static final SimpleEdge EPSILON = new SimpleEdge();
+  // The singleton edge matching any digit (i.e. 'x' or '\d').
+  private static final SimpleEdge ANY = new SimpleEdge(ALL_DIGITS_MASK, false);
+  // The singleton edge optionally matching any digit (i.e. 'x?' or '\d?').
+  private static final SimpleEdge OPTIONAL_ANY = ANY.optional();
+
+  /** Returns an edge which accepts digits 0 to 9 according tothe bits set in the given mask. */
+  public static SimpleEdge fromMask(int digitMask) {
+    return digitMask == ALL_DIGITS_MASK ? ANY : new SimpleEdge(digitMask, false);
+  }
+
+  /**
+   * Returns the epsilon edge which accepts zero length input and transitions immediately. This
+   * edge should only ever appear parallel to other edges, and not as the only transition between
+   * two nodes.
+   */
+  public static SimpleEdge epsilon() {
+    return EPSILON;
+  }
+
+  /** Returns the edge which accepts any digit {@code [0-9]}. */
+  public static SimpleEdge any() {
+    return ANY;
+  }
+
+  /** Returns the edge which optionally accepts any digit {@code [0-9]}. */
+  public static SimpleEdge optionalAny() {
+    return OPTIONAL_ANY;
+  }
+
+  /**
+   * Returns the ordered concatenation of the given edges. If either edge is a concatenation, it
+   * is first expanded, so that the resulting edge contains only simple edges or disjunctions.
+   */
+  public static Edge concatenation(Edge lhs, Edge rhs) {
+    checkArgument(!lhs.equals(EPSILON) && !rhs.equals(EPSILON), "cannot concatenate epsilon edges");
+    // Don't make concatenations of concatenations; flatten them out so you only have singletons
+    // or disjunctions. This is equivalent to writing "xyz" instead of "x(yz)".
+    List<Edge> edges = Stream.of(lhs, rhs)
+        .flatMap(
+            e -> (e instanceof Concatenation) ? ((Concatenation) e).edges.stream() : Stream.of(e))
+        .collect(Collectors.toList());
+    return new Concatenation(edges);
+  }
+
+  /**
+   * Returns the disjunction of the given edges. If either edge is already a concatenation, it
+   * is first expanded, so that the resulting edge contains only simple edges or disjunctions.
+   */
+  public static Edge disjunction(Collection<Edge> edges) {
+    // Don't make disjunctions of disjunctions; flatten them out so you only have singletons,
+    // concatenations or epsilon. This is equivalent to writing "(x|y|z)" instead of "(x|(y|z))".
+    List<Edge> allEdges = edges.stream()
+        .flatMap(
+            e -> (e instanceof Disjunction) ? ((Disjunction) e).edges.stream() : Stream.of(e))
+        .sorted()
+        .distinct()
+        .collect(Collectors.toList());
+    // There should only ever be one epsilon when we make a disjunction (disjunctions are made when
+    // subgraphs collapse and each subgraph should only have one epsilon to make it optional).
+    // Epsilons sort to-the-left of everything, so if there is an epsilon it must be the first edge.
+    boolean isOptional = allEdges.get(0) == EPSILON;
+    if (isOptional) {
+      allEdges = allEdges.subList(1, allEdges.size());
+    }
+    Preconditions.checkState(!allEdges.contains(EPSILON));
+    return new Disjunction(allEdges, isOptional);
+  }
+
+  /** An edge optionally matching a single input token, or the epsilon transition. */
+  public static final class SimpleEdge extends Edge {
+    private final int digitMask;
+    private final boolean isOptional;
+
+    // Constructor for singleton epsilon edge.
+    private SimpleEdge() {
+      this.digitMask = 0;
+      // An optional epsilon makes no real sense.
+      this.isOptional = false;
+    }
+
+    private SimpleEdge(int digitMask, boolean isOptional) {
+      checkArgument(digitMask > 0 && digitMask < (1 << 10), "invalid bit mask %s", digitMask);
+      this.digitMask = digitMask;
+      this.isOptional = isOptional;
+    }
+
+    /** Returns the mask of digits accepted by this edge. */
+    public int getDigitMask() {
+      return digitMask;
+    }
+
+    /** Returns whether this edge is optional. */
+    public boolean isOptional() {
+      return isOptional;
+    }
+
+    /** Returns an optional version of this, non-optional edge. */
+    public SimpleEdge optional() {
+      Preconditions.checkState(digitMask != 0, "cannot make epsilon optional");
+      Preconditions.checkState(!isOptional, "edge already optional");
+      return new SimpleEdge(digitMask, true);
+    }
+
+    @Override
+    public void accept(Visitor visitor) {
+      visitor.visit(this);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return (obj instanceof SimpleEdge) && digitMask == ((SimpleEdge) obj).digitMask;
+    }
+
+    @Override
+    public int hashCode() {
+      return digitMask;
+    }
+
+    @Override
+    public int compareTo(Edge rhs) {
+      if (rhs instanceof SimpleEdge) {
+        return compare((SimpleEdge) rhs);
+      } else {
+        // Composite types know how to compare themselves to SimpleEdges, so delegate to them but
+        // remember to invert the result since we are reversing the comparison order.
+        return -rhs.compareTo(this);
+      }
+    }
+
+    private int compare(SimpleEdge rhs) {
+      if (isOptional != rhs.isOptional) {
+        // Optional edges sort to-the-right of non-optional things.
+        return isOptional ? 1 : -1;
+      }
+      if (digitMask == rhs.digitMask) {
+        return 0;
+      }
+      if (digitMask == 0 || rhs.digitMask == 0) {
+        // Epsilon sorts to-the-left of everything.
+        return digitMask == 0 ? -1 : 1;
+      }
+      // Unlike many other places where range specifications are used, we cannot guarantee the
+      // ranges are disjoint here, so we sort on the reversed bitmask to favour the lowest set bit.
+      // This sorts 'x' ([0-9]) to the left of everything, and epsilon to the right of everything.
+      // I.e. "x" < "0", "0" < "1", "[0-3]" < "[0-2]", "9" < epsilon.
+      //
+      // Remember to logical-shift back down to avoid negative values.
+      int reverseLhsMask = (Integer.reverse(digitMask) >>> 22);
+      int reverseRhsMask = (Integer.reverse(rhs.digitMask) >>> 22);
+      // Compare in the opposite order, so the largest reversed value is ordered "to the left".
+      return Integer.compare(reverseRhsMask, reverseLhsMask);
+    }
+  }
+
+  // A sequence of edges (disjunctions or simple edges).
+  private static final class Concatenation extends Edge {
+    private final ImmutableList<Edge> edges;
+
+    private Concatenation(Collection<Edge> edges) {
+      this.edges = ImmutableList.copyOf(edges);
+    }
+
+    @Override
+    public void accept(Visitor visitor) {
+      visitor.visitSequence(edges);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return (obj instanceof Concatenation) && edges.equals(((Concatenation) obj).edges);
+    }
+
+    @Override
+    public int hashCode() {
+      return edges.hashCode();
+    }
+
+    @Override
+    public int compareTo(Edge rhs) {
+      if (rhs instanceof Concatenation) {
+        return compareEdges(edges, ((Concatenation) rhs).edges);
+      } else {
+        // Compare our first edge to the non-concatenation. If this compares as equal, order the
+        // concatenation between simple edges and disjunctions to break the tie and avoid implying
+        // that a concatenation and a non-concatenation are equal.
+        int comparison = -rhs.compareTo(edges.get(0));
+        return comparison != 0 ? comparison : (rhs instanceof SimpleEdge ? 1 : -1);
+      }
+    }
+  }
+
+  // A disjunctive group of edges (sequences or simple edges).
+  private static final class Disjunction extends Edge {
+    private final ImmutableSortedSet<Edge> edges;
+    private final boolean isOptional;
+
+    private Disjunction(Collection<Edge> edges, boolean isOptional) {
+      checkArgument(!edges.isEmpty());
+      this.edges = ImmutableSortedSet.copyOf(edges);
+      this.isOptional = isOptional;
+    }
+
+    @Override
+    public void accept(Visitor visitor) {
+      visitor.visitGroup(edges, isOptional);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return (obj instanceof Disjunction) && edges.equals(((Disjunction) obj).edges);
+    }
+
+    @Override
+    public int hashCode() {
+      // Negate bits here to be different from Concatenation.
+      return ~edges.hashCode();
+    }
+
+    @Override
+    public int compareTo(Edge rhs) {
+      if (rhs instanceof Disjunction) {
+        return compareEdges(edges.asList(), ((Disjunction) rhs).edges.asList());
+      } else {
+        // Compare our first edge to the non-disjunction. If this compares as equal, order the
+        // disjunction to the right of the other edge to break the tie and avoid implying that
+        // a disjunction and a non-disjunction are equal.
+        int comparison = -rhs.compareTo(edges.asList().get(0));
+        return comparison == 0 ? 1 : comparison;
+      }
+    }
+  }
+
+  /**
+   * Accepts a visitor on this edge, visiting any sub-edges from which it is composed. This is a
+   * double-dispatch visitor to avoid anyone processing edges needing to know about specific types.
+   * Only the immediate edge is visited and the visitor is then responsible for visiting child
+   * edges.
+   */
+  public abstract void accept(Visitor visitor);
+
+  // Compare lists according to elements, and tie break on length if different. This is effectively
+  // a lexicographical ordering.
+  private static int compareEdges(ImmutableList<Edge> lhs, ImmutableList<Edge> rhs) {
+    int minSize = Math.min(lhs.size(), rhs.size());
+    for (int n = 0; n < minSize; n++) {
+      int compared = lhs.get(n).compareTo(rhs.get(n));
+      if (compared != 0) {
+        return compared;
+      }
+    }
+    return Integer.compare(lhs.size(), rhs.size());
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder out = new StringBuilder();
+    accept(new Visitor() {
+      @Override
+      public void visit(SimpleEdge e) {
+        if (e.equals(Edge.epsilon())) {
+          // Epsilon cannot be optional.
+          out.append("e");
+        } else {
+          int m = e.getDigitMask();
+          out.append(m == ALL_DIGITS_MASK ? "x" : RangeSpecification.toString(m));
+          if (e.isOptional()) {
+            out.append('?');
+          }
+        }
+      }
+
+      @Override
+      public void visitSequence(List<Edge> edges) {
+        edges.forEach(e -> e.accept(this));
+      }
+
+      @Override
+      public void visitGroup(Set<Edge> edges, boolean isOptional) {
+        out.append("(");
+        edges.forEach(e -> {
+          e.accept(this);
+          out.append("|");
+        });
+        out.setLength(out.length() - 1);
+        out.append(isOptional ? ")?" : ")");
+      }
+    });
+    return out.toString();
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriter.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriter.java
new file mode 100644
index 0000000000..1543a81a19
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriter.java
@@ -0,0 +1,343 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
+import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
+
+import com.google.common.collect.Iterables;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.Visitor;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import javax.annotation.Nullable;
+
+/** Writes an NFA graph edge instance as a regular expression. */
+final class EdgeWriter implements Visitor {
+  // Regex constant strings pulled out for some degree of readability.
+  private static final String DOT_MATCH = ".";
+  private static final String DIGIT_MATCH = "\\d";
+  private static final String OPTIONAL_MARKER = "?";
+  private static final String GROUP_START = "(?:";
+  private static final String GROUP_DISJUNCTION = "|";
+  private static final String GROUP_END = ")";
+  private static final String OPTIONAL_GROUP_END = GROUP_END + OPTIONAL_MARKER;
+
+  /**
+   * Returns a regular expression corresponding to the structure of the given edge. This method
+   * does not apply any specific optimizations to the edge it is given and any optimizations which
+   * affect the output must have already been applied to the graph from which the input edge was
+   * derived.
+   *
+   * @param edge A collapsed edge typically derived from serializing an NFA graph.
+   * @param useDotMatch true if {@code '.'} should be used to "match any digit" (instead of
+   *     {@code '\\d'}) which results in shorter output.
+   */
+  public static String toRegex(Edge edge, boolean useDotMatch) {
+    EdgeWriter writer = new EdgeWriter(useDotMatch);
+    edge.accept(writer);
+    return writer.out.toString();
+  }
+
+  // The token to match any input digit (e.g. "\\d" or ".").
+  private final String anyToken;
+  // Accumulated regular expression appended to during visitation.
+  private final StringBuilder out = new StringBuilder();
+  // Flag to determine when the top-level edge visited is a group, because if it is we can often
+  // omit the explicit grouping tokens and save some space.
+  private boolean isTopLevelGroup = true;
+
+  private EdgeWriter(boolean useDotMatch) {
+    this.anyToken = useDotMatch ? DOT_MATCH : DIGIT_MATCH;
+  }
+
+  @Override
+  public void visit(SimpleEdge e) {
+    checkArgument(!e.equals(Edge.epsilon()), "unexpected bare epsilon");
+    isTopLevelGroup = false;
+    // It's easier to just attempt to extract an "any" edge as that code already has to work for
+    // simple edges when they are inside other composite edges. Optionality is encoded into the
+    // resulting AnyPath and handled by appendRegex(), so we don't need to handle it again here.
+    Optional<AnyPath> any = AnyPathVisitor.extractAnyPath(e);
+    if (any.isPresent()) {
+      appendRegex(out, any.get().mask());
+    } else {
+      // Not an "any" edge so append the usual range representation (e.g. "6" or "[014-9]").
+      out.append(RangeSpecification.toString(e.getDigitMask()));
+      if (e.isOptional()) {
+        out.append(OPTIONAL_MARKER);
+      }
+    }
+  }
+
+  @Override
+  public void visitSequence(List<Edge> edges) {
+    checkArgument(!edges.isEmpty(), "sequences must have at least one edge");
+    isTopLevelGroup = false;
+    // At this level a sequence might be a mix of normal and "any" edges (e.g. "123xxxx"). To
+    // cope with this, track and accumulate the un-written "any" edge, and emit it just before
+    // any other output (or at the end).
+    AnyPath any = AnyPath.EMPTY;
+    for (Edge e : edges) {
+      Optional<AnyPath> next = AnyPathVisitor.extractAnyPath(e);
+      if (next.isPresent()) {
+        any = any.join(next.get());
+        continue;
+      }
+      // Here we have a "normal" edge, but we still might need to emit a collected "any" edge.
+      if (!any.isEmpty()) {
+        appendRegex(out, any.mask());
+        any = AnyPath.EMPTY;
+      }
+      // This recursion only happens when this was not an "any" edge (though it may still be a
+      // composite that contains other "any" edges).
+      e.accept(this);
+    }
+    // If the last thing we saw in this sequence was an "any" edge, don't forget to emit it.
+    if (!any.isEmpty()) {
+      appendRegex(out, any.mask());
+    }
+  }
+
+  @Override
+  public void visitGroup(Set<Edge> edges, boolean isOptional) {
+    checkArgument(!edges.isEmpty(), "groups must have at least one edge");
+    // The very top-level group is almost always non-optional and can be omitted for length
+    // (ie. "(?:a|b|c)" can just be "a|b|c").
+    boolean canSkipParens = isTopLevelGroup && !isOptional;
+    // Unset this before recursing.
+    isTopLevelGroup = false;
+
+    // We have exactly one case where an "any" edge needs to be handled for groups, and that's
+    // when there's an optional any group that's not part of an enclosing sequence (e.g. "(xx)?").
+    if (edges.size() == 1 && isOptional) {
+      Optional<AnyPath> any = AnyPathVisitor.extractAnyPath(Iterables.getOnlyElement(edges));
+      if (any.isPresent()) {
+        // Remember to account for the optionality of the outer group.
+        appendRegex(out, any.get().makeOptional().mask());
+        return;
+      }
+    }
+
+    if (!canSkipParens) {
+      out.append(GROUP_START);
+    }
+    for (Edge e : edges) {
+      e.accept(this);
+      out.append(GROUP_DISJUNCTION);
+    }
+    // Easier to just remove the disjunction we know was added last than track state in the loop.
+    out.setLength(out.length() - GROUP_DISJUNCTION.length());
+    if (!canSkipParens) {
+      out.append(isOptional ? OPTIONAL_GROUP_END : GROUP_END);
+    }
+  }
+
+  /**
+   * Recursive visitor to extract "any" sequences from edges (simple or composite). A sequence of
+   * edges is an "any path" if all edges accept any digit. Composite edges already enforce the
+   * requirement that epsilon edges don't exist directly (they are represented via optionality).
+   */
+  private static final class AnyPathVisitor implements Visitor {
+    /**
+     * Returns the longest "any" sequence represented by the given edge (if the edge represents an
+     * any sequence). If present, the result is non-empty.
+     */
+    @Nullable
+    public static Optional<AnyPath> extractAnyPath(Edge e) {
+      AnyPathVisitor visitor = new AnyPathVisitor();
+      e.accept(visitor);
+      return Optional.ofNullable(visitor.path);
+    }
+
+    // Accumulate value during visitation and set to null to abort.
+    @Nullable
+    private AnyPath path = AnyPath.EMPTY;
+
+    @Override
+    public void visit(SimpleEdge edge) {
+      checkState(path != null, "path should never be null at start of recursion");
+      if (edge.getDigitMask() == ALL_DIGITS_MASK) {
+        path = path.join(edge.isOptional() ? AnyPath.OPTIONAL : AnyPath.SINGLE);
+      } else {
+        path = null;
+      }
+    }
+
+    @Override
+    public void visitSequence(List<Edge> edges) {
+      checkState(path != null, "path should never be null at start of recursion");
+      // Looking for a complete sequence of "any edges" (partial sequences in a concatenation are
+      // taken care of by the caller).
+      for (Edge e : edges) {
+        Optional<AnyPath> next = AnyPathVisitor.extractAnyPath(e);
+        if (next.isPresent()) {
+          path = path.join(next.get());
+        } else {
+          path = null;
+          break;
+        }
+      }
+    }
+
+    @Override
+    public void visitGroup(Set<Edge> edges, boolean isOptional) {
+      checkState(path != null, "path should never be null at start of recursion");
+      // Looking for a group like (xxx(xx)?)? which contains one edge only. We just recurse into
+      // that edge and then make the result optional (a disjuction with only one edge must be
+      // optional or else it should have been a concatenation).
+      if (edges.size() > 1) {
+        path = null;
+        return;
+      }
+      checkState(isOptional, "single edge disjunctions should be optional");
+      Edge e = Iterables.getOnlyElement(edges);
+      e.accept(this);
+      if (path != null) {
+        path = path.makeOptional();
+      }
+    }
+  }
+
+  // The code below here is really a bit squiffy and relies on a whole bunch of bit fiddling to
+  // do what it does. The good news is that it's easy to unit-test the heck out of, so that's
+  // what I've done. Don't look too hard at what's going on unless you're a bit of a masochist.
+
+  /**
+   * Appends the regular expression corresponding to the given AnyPath mask value. This is a
+   * bit-mask where the Nth bit corresponds to accepting an any digit sequence of length N.
+   *
+   * <p>For example:
+   * <ul>
+   *   <li> {@code 00000010} accepts only length 1 (e.g. "\d")
+   *   <li> {@code 00000011} accepts lengths 0 or 1 (e.g. "\d?")
+   *   <li> {@code 00001000} accepts only length 3 (e.g. "\d{3}")
+   *   <li> {@code 00011100} accepts lengths 2-4 (e.g. "\d{2,4}")
+   *   <li> {@code 11101100} accepts lengths 0,2,3,5,6,7 (e.g. "(?:\d\d(?:\d(?:\d{2,4})?)?)?")
+   * </ul>
+   */
+  private void appendRegex(StringBuilder out, int mask) {
+    checkArgument(mask > 1, "unexpected mask value %s", mask);
+    // Deal with optionality separately.
+    boolean allOptional = (mask & 0x1) != 0;
+    mask &= ~0x1;
+    // We are looking for bit patterns like '1111000' for contiguous ranges (e.g. {3,7}).
+    // Find the lo/hi size of the next contiguous range (inclusive).
+    int lo = Integer.numberOfTrailingZeros(mask);
+    int hi = Integer.numberOfTrailingZeros(~(mask >>> lo)) + (lo - 1);
+
+    // If all the bits are accounted for (nothing above the "hi" bit) then this was the last
+    // contiguous range and we don't need to recurse (so no more groups need to be opened).
+    if (mask < (1 << (hi + 1))) {
+      // Writes a contiguous range as a single token with optionality (e.g. "\d", "(?:\d{2,4})?").
+      appendAnyRange(out, lo, hi, allOptional);
+      return;
+    }
+    // This is about the entire group, not the subgroup we are about to recurse into.
+    if (allOptional) {
+      out.append(GROUP_START);
+    }
+    // IMPORTANT: If we are recursing, we must not attempt to emit the entire group here, only the
+    // shortest matching length.
+    //
+    // Mask "11101100" does NOT represent "\d{2,3}(?:\d{2,4})?" as that can match 4-digits too.
+    // Instead it should generate "\d\d(?:\d(?:\d{2,4})?)?", where the 3 digit match is part of an
+    // optional group.
+    appendRequiredAnyRange(out, lo);
+    // Recurse using the mask that's had the match we just emitted "factored out". This is always
+    // optional because bit-0 is what was the lowest set bit in our mask.
+    appendRegex(out, mask >>> lo);
+    if (allOptional) {
+      out.append(OPTIONAL_GROUP_END);
+    }
+  }
+
+  /**
+   * Appends regular expression tokens that accept any digits for a single length.
+   *
+   * <p>For example:
+   * <ol>
+   *   <li>{@code n=1}: {@code "\d"}
+   *   <li>{@code n=2}: {@code "\d\d"} (this could be extended if using '.')
+   *   <li>{@code otherwise}: {@code "\d{n}"}
+   * </ol>
+   */
+  private void appendRequiredAnyRange(StringBuilder out, int n) {
+    checkArgument(n >= 1, "bad any length %s", n);
+    out.append(anyToken);
+    if (n == 2) {
+      // Only safe to do this if the group is not optional ("\d\d?" != "(?:\d{2})?").
+      out.append(anyToken);
+    } else if (n > 2) {
+      out.append('{').append(n).append('}');
+    }
+  }
+
+  /**
+   * Appends regular expression tokens that accept any digits in a contiguous range of lengths.
+   *
+   * <p>For example:
+   * <ol>
+   *   <li>{@code lo=1, hi=1, optional=false}: {@code "\d"}
+   *   <li>{@code lo=1, hi=1, optional=true}: {@code "\d?"}
+   *   <li>{@code lo=2, hi=2, optional=true}: {@code "(?:\d{2})?"}
+   *   <li>{@code lo=3, hi=6, optional=false}: {@code "\d{3,6}"}
+   *   <li>{@code lo=3, hi=6, optional=true}: {@code "(?:\d{3,6})?"}
+   *   <li>{@code lo=1, hi=4, optional=true}: {@code "\d{0,4}"} (not {@code (?:\d{1,4})?})
+   *   <li>{@code lo=2, hi=2, optional=false}: {@code "\d\d"} (special case for size)
+   *   <li>{@code lo=1, hi=2, optional=false}: {@code "\d\d?"} (special case for size)
+   * </ol>
+   */
+  private void appendAnyRange(StringBuilder out, int lo, int hi, boolean optional) {
+    checkArgument(lo >= 1 && hi >= lo, "bad range arguments %s, %s", lo, hi);
+    if (lo == hi) {
+      if (!optional) {
+        // Required single length.
+        appendRequiredAnyRange(out, lo);
+      } else {
+        // Optional single length.
+        if (lo > 1) {
+          out.append(GROUP_START).append(anyToken);
+          out.append('{').append(lo).append('}');
+          out.append(OPTIONAL_GROUP_END);
+        } else {
+          out.append(anyToken).append(OPTIONAL_MARKER);
+        }
+      }
+    } else if (lo == 1 && hi == 2 && !optional) {
+      // Special case for "\d\d?" as it's shorter than "\d{1,2}" (and even shorter with '.').
+      // Even though we append the "optional marker" (i.e. '?') here it's got nothing to do
+      // with the entire group being optional. That would be "(?:\d{1,2})?" which is "\d{0,2}".
+      out.append(anyToken).append(anyToken).append(OPTIONAL_MARKER);
+    } else if (lo == 1 && optional) {
+      // Special case to write "\d{0,N}" instead of "(?:\d{1,N})?"
+      out.append(anyToken).append("{0,").append(hi).append('}');
+    } else {
+      if (optional) {
+        out.append(GROUP_START);
+      }
+      // General case.
+      out.append(anyToken).append('{').append(lo).append(',').append(hi).append('}');
+      if (optional) {
+        out.append(OPTIONAL_GROUP_END);
+      }
+    }
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattener.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattener.java
new file mode 100644
index 0000000000..1f5a4de5c2
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattener.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import com.google.common.graph.ValueGraph;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.function.Function;
+
+/**
+ * Flattens an NFA graph of simple edges into a composite edge which represents all the same
+ * transitions in a strict tree structure (i.e. nestable sub-groups). This can entail some
+ * duplication of edges, but this should be kept to a minimum and favours duplicating trailing
+ * paths to avoid introducing additional non-determinism.
+ */
+final class NfaFlattener {
+  /**
+  * Flattens the given NFA graph into a single composite edge composed of concatenation and
+  * disjunction. The resulting edge can be visited using the {@code Edge.Visitor} class.
+  */
+  public static Edge flatten(ValueGraph<Node, SimpleEdge> graph) {
+    return new NfaFlattener(graph).flatten();
+  }
+
+  /*
+   * A simple pair of edge value and target node which represents the current state along any path
+   * in the NFA graph. Path followers may be joined (if they point at the same node) but can only
+   * be split by recursion into the new subgraph.
+   */
+  @AutoValue
+  abstract static class PathFollower {
+    private static PathFollower of(Node node, Edge edge) {
+      return new AutoValue_NfaFlattener_PathFollower(node, edge);
+    }
+
+    /** The target node that this follower points to. */
+    abstract Node node();
+    /** A composite edge representing everything up to the target node in the current sub-graph. */
+    abstract Edge edge();
+  }
+
+  // The graph being flattened.
+  private final ValueGraph<Node, SimpleEdge> graph;
+  // An ordering for the work queue which ensures that followers with the same node are adjacent.
+  private final Comparator<PathFollower> queueOrder;
+
+  private NfaFlattener(ValueGraph<Node, SimpleEdge> graph) {
+    this.graph = graph;
+    this.queueOrder = Comparator
+        .comparing(PathFollower::node, nodeOrdering(graph))
+        .thenComparing(PathFollower::edge);
+  }
+
+  private Edge flatten() {
+    // Sub-graph visitation only works for graphs which branch from and collapse to a single node.
+    // An NFA graph could be multiple sequential edges or a sequence of edges and sub-graphs.
+    // Handle that in this outer loop rather than complicate the visitor (already quite complex).
+    PathFollower out = visitSubgraph(Node.INITIAL);
+    while (out.node() != Node.TERMINAL) {
+      PathFollower subgraph = visitSubgraph(out.node());
+      out = PathFollower.of(subgraph.node(), Edge.concatenation(out.edge(), subgraph.edge()));
+    }
+    return out.edge();
+  }
+
+  /**
+   * Visits the sub-graph rooted at the given node, following all out-edges until they eventually
+   * re-join. Because the given graph has only one terminal node and no cycles, all sub-graphs must
+   * eventually rejoin at some point. If during visitation of a sub-graph, a node with multiple
+   * out-edges is reached, then the sub-graph it starts is recursively visited. Note that as "inner"
+   * sub-graphs must terminate at or before their parent graph, nesting is assured.
+   *
+   * <p>The key to the implementation of this algorithm is that visitation occurs in breadth-first
+   * order defined according to the reachability of the nodes in the graph. This ensures that when
+   * an edge follower which reaches a node at which other edges join together is processed (i.e.
+   * when it gets to the head of the queue) all the other followers that can also reach that node
+   * must also be present in a contiguous sequence at the front of the queue.
+   */
+  private PathFollower visitSubgraph(Node node) {
+    Preconditions.checkArgument(graph.outDegree(node) > 0, "cannot recurse from the terminal node");
+    if (graph.outDegree(node) == 1) {
+      // Visit the trivial "subgraph" that's really just a single edge. Note that this code could
+      // loop and concatenate all sequential single edges, but it also works fine to rely on the
+      // recursion of the caller (the advantage of doing it this, simpler, way means that this code
+      // doesn't have to know about termination due to reaching the terminal node).
+      Node target = Iterables.getOnlyElement(graph.successors(node));
+      return PathFollower.of(target, graph.edgeValue(node, target).get());
+    }
+    // A work-queue of the path followers, ordered primarily by the node they target. This results
+    // in the followers at any "point of collapse" being adjacent in the queue.
+    PriorityQueue<PathFollower> followerQueue = new PriorityQueue<>(queueOrder);
+    for (Node t : graph.successors(node)) {
+      followerQueue.add(PathFollower.of(t, graph.edgeValue(node, t).get()));
+    }
+    while (true) {
+      // Get the set of followers that share the same target node at the head of the queue. The
+      // ordering in the queue ensures that followers for the same target are always adjacent.
+      PathFollower follower = followerQueue.remove();
+      Node target = follower.node();
+      List<Edge> joiningEdges = collectJoiningEdges(followerQueue, target);
+      if (joiningEdges != null) {
+        // Replace any joined followers with their disjunction (they all have the same target).
+        joiningEdges.add(follower.edge());
+        follower = PathFollower.of(target, Edge.disjunction(joiningEdges));
+      }
+      if (followerQueue.isEmpty()) {
+        // If we just processed the last "joining" paths then this sub-graph has been collapsed
+        // into a single edge and we just return the current follower. Note that we can join edges
+        // without ending recursion (when 3 followers become 2) but we can only end recursion after
+        // joining at least 2 edges at the terminal sub-graph node.
+        return follower;
+      }
+      // Recurse into the next sub-graph (possibly just a single edge) which is just concatenated
+      // onto the current follower.
+      PathFollower subgraph = visitSubgraph(target);
+      followerQueue.add(
+          PathFollower.of(subgraph.node(), Edge.concatenation(follower.edge(), subgraph.edge())));
+    }
+  }
+
+  // Collects the edges of any followers at the front of the queue which share the same target node
+  // as the given follower. If the node is not a target of any other followers then return null.
+  private static List<Edge> collectJoiningEdges(PriorityQueue<PathFollower> queue, Node target) {
+    // It's really common for edges not to join, so avoid making the list unless necessary.
+    if (!nextFollowerJoinsTarget(queue, target)) {
+      return null;
+    }
+    List<Edge> joiningEdges = new ArrayList<>();
+    do {
+      joiningEdges.add(queue.remove().edge());
+    } while (nextFollowerJoinsTarget(queue, target));
+    return joiningEdges;
+  }
+
+  // Checks if the head of the queue is a follower with the same target node.
+  private static boolean nextFollowerJoinsTarget(PriorityQueue<PathFollower> queue, Node target) {
+    return !queue.isEmpty() && queue.peek().node().equals(target);
+  }
+
+  /**
+   * Returns a total ordering of nodes in this graph based on the maximum path length from the
+   * initial node. If path lengths are equal for two nodes, then the node ID is used to tie break.
+   *
+   * <p>The property of this ordering that is critical to the node flattening algorithm is that if
+   * {@code a < b}, then no path exists in the graph where {@code b} precedes {@code a}. This
+   * ensures that path followers are processed consistently with the "node reachability" and if
+   * several path followers target the same node, then they are adjacent in the follower queue.
+   *
+   * <p>Using the node ID as a tie-break is safe, because while node IDs are assigned arbitrarily,
+   * they only apply between nodes in the same path length "bucket", so it cannot violate the total
+   * ordering requirement, since any order within a "bucket" is equally good.
+   */
+  // Note: If there are graph cycles this will not terminate, but that implies bad bugs elsewhere.
+  @VisibleForTesting
+  static Comparator<Node> nodeOrdering(ValueGraph<Node, ?> graph) {
+    Map<Node, Integer> map = new HashMap<>();
+    recursivelySetMaxPathLength(Node.INITIAL, 0, graph, map);
+    // We have to cast the "get" method since it accepts "Object", not "Node" on a map.
+    return Comparator.comparing((Function<Node, Integer>) map::get).thenComparing(Node::id);
+  }
+
+  private static void recursivelySetMaxPathLength(
+      Node node, int length, ValueGraph<Node, ?> graph, Map<Node, Integer> map) {
+    // Only continue if at least some paths can be lengthened from here onwards.
+    if (length > map.getOrDefault(node, -1)) {
+      map.put(node, length);
+      for (Node target : graph.successors(node)) {
+        recursivelySetMaxPathLength(target, length + 1, graph, map);
+      }
+    }
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Node.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Node.java
new file mode 100644
index 0000000000..d54d2bc1a1
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Node.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import com.google.auto.value.AutoValue;
+
+/**
+ * Value type for nodes in NFA graphs of phone number regular expressions. This is basically a
+ * trivial wrapper for an {@code int}, but it makes a lot of other pieces of code type safe.
+ * Outside this package, this type is mainly used for examining NFA graphs which represent a
+ * regular expression, generated via {@link RangeTreeConverter#toNfaGraph}.
+ */
+@AutoValue
+public abstract class Node implements Comparable<Node> {
+  /** The unique initial node in an NFA graph with in-order zero. */
+  public static final Node INITIAL = new AutoValue_Node(0);
+  /** The unique terminal node in an NFA graph with out-order zero. */
+  public static final Node TERMINAL = new AutoValue_Node(1);
+
+  /** Returns a new node whose ID is one greater than this node. */
+  public Node createNext() {
+    return (id() == 0) ? TERMINAL : new AutoValue_Node(id() + 1);
+  }
+
+  /** Returns the numeric ID of this node, which must be unique within an NFA graph. */
+  abstract int id();
+
+  @Override
+  public int compareTo(Node o) {
+    return Integer.compare(id(), o.id());
+  }
+
+  @Override
+  public final String toString() {
+    return Integer.toString(id());
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverter.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverter.java
new file mode 100644
index 0000000000..07816dece0
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverter.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.common.graph.ElementOrder;
+import com.google.common.graph.MutableValueGraph;
+import com.google.common.graph.ValueGraph;
+import com.google.common.graph.ValueGraphBuilder;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Converts DFA {@link RangeTree}s to NFA {@link ValueGraph}s. The resulting graph has almost
+ * exactly the same node and edge structure as the original DFA, with the following exceptions:
+ * <ol>
+ *   <li>Nodes which could optionally terminate now have 'epsilon' edges connecting them to the
+ *   terminal node.
+ *   <li>If an optionally terminating node connects directly to the terminal node, then a special
+ *   "optional edge" is used (this is because the {@link ValueGraph} structure allows only one
+ *   value for each edge, so you can't have an epsilon edge that goes between the same source and
+ *   target as other edge).
+ * </ol>
+ */
+public final class RangeTreeConverter {
+  /**
+   * Returns the directed NFA graph representation of a {@link RangeTree}. The returned graph is
+   * not a DFA and may contain epsilon transitions. Nodes are assigned in visitation order, except
+   * for the initial and terminal nodes which are always present in the graph.
+   */
+  public static ValueGraph<Node, SimpleEdge> toNfaGraph(RangeTree ranges) {
+    NfaVisitor visitor = new NfaVisitor(ranges.getInitial());
+    ranges.accept(visitor);
+    return visitor.graph;
+  }
+
+  private static class NfaVisitor implements DfaVisitor {
+    private final MutableValueGraph<Node, SimpleEdge> graph = ValueGraphBuilder
+        .directed()
+        .allowsSelfLoops(false)
+        // Stable ordering should help keep any generated structures (regex, graph files) stable.
+        .nodeOrder(ElementOrder.<Node>natural())
+        .build();
+    // Map of nodes added to the new graph (keyed by the corresponding DFA node).
+    private final Map<DfaNode, Node> nodeMap = new HashMap<>();
+    // The last node we added.
+    private Node lastAdded;
+
+    private NfaVisitor(DfaNode initial) {
+      // Add initial and terminal nodes first (there's always exactly one of each).
+      graph.addNode(Node.INITIAL);
+      graph.addNode(Node.TERMINAL);
+      // During visitation we check only target nodes to add epsilon edges, but we may also need
+      // to add an epsilon from the very top if the DFA can match the empty input.
+      if (initial.canTerminate()) {
+        graph.putEdgeValue(Node.INITIAL, Node.TERMINAL, Edge.epsilon());
+      }
+      nodeMap.put(initial, Node.INITIAL);
+      nodeMap.put(RangeTree.getTerminal(), Node.TERMINAL);
+      lastAdded = Node.TERMINAL;
+    }
+
+    @Override
+    public void visit(DfaNode dfaSource, DfaEdge dfaEdge, DfaNode dfaTarget) {
+      SimpleEdge simpleEdge = Edge.fromMask(dfaEdge.getDigitMask());
+      Node source = nodeMap.get(dfaSource);
+      Node target = getTarget(dfaTarget);
+      boolean wasNewNode = graph.addNode(target);
+      // The only chance of an existing edge is if an epsilon was already added immediately before
+      // visiting this edge. This can only occur if (target == TERMINAL) however.
+      SimpleEdge epsilon = graph.putEdgeValue(source, target, simpleEdge);
+      if (epsilon != null) {
+        checkState(target.equals(Node.TERMINAL) && epsilon.equals(Edge.epsilon()),
+            "unexpected edge during visitation: %s -- %s --> %s", source, epsilon, target);
+        // Re-add the edge, but this time make it optional (because that's what epsilon means).
+        graph.putEdgeValue(source, target, simpleEdge.optional());
+      }
+      // Only recurse if the target node was newly added to the graph in this visitation.
+      if (wasNewNode) {
+        // The TERMINAL node is always in the map so (target != TERMINAL) here. This means we
+        // never risk adding a loop in the graph. The epsilon may end up being swapped out for
+        // an optional edge when we visit the dfaTarget, but that's fine.
+        if (dfaTarget.canTerminate()) {
+          graph.putEdgeValue(target, Node.TERMINAL, Edge.epsilon());
+        }
+        dfaTarget.accept(this);
+      }
+    }
+
+    // Gets or creates a new target node, adding it to the node map (but not to the graph itself).
+    private Node getTarget(DfaNode gnode) {
+      Node target = nodeMap.get(gnode);
+      if (target != null) {
+        return target;
+      }
+      lastAdded = lastAdded.createNext();
+      nodeMap.put(gnode, lastAdded);
+      return lastAdded;
+    }
+  }
+
+  private RangeTreeConverter() {}
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatter.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatter.java
new file mode 100644
index 0000000000..25f1f20c94
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatter.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.base.Preconditions;
+
+/**
+ * Simple indenting formatter for regular expressions and other similar nested syntax. Obviously
+ * the results are not the same from a match perspective as the new string contains whitespace.
+ */
+public final class RegexFormatter {
+  /** Option for how to handle formatting of groups. */
+  public enum FormatOption {
+    PRESERVE_CAPTURING_GROUPS,
+    FORCE_NON_CAPTURING_GROUPS,
+    FORCE_CAPTURING_GROUPS,
+  }
+
+  // We only care about 3 specific tokens, so this code can be used to print strings which look
+  // similar (nested, disjunctive groups) such as the toString() of the Edge class.
+  private static final CharMatcher tokens = CharMatcher.anyOf("()|");
+
+  /**
+   * Formats a regular expression (or similar nested group syntax) using the following rules:
+   * <ol>
+   * <li>Newline after opening '(?:' and increase indent.
+   * <li>Newline after '|'
+   * <li>Decrease indent and add newline before closing ')'
+   * </ol>
+   */
+  public static String format(String regex, FormatOption formatOption) {
+    return new RegexFormatter(regex, formatOption).format();
+  }
+
+  private final StringBuilder out = new StringBuilder();
+  private final String regex;
+  private final FormatOption formatOption;
+
+  private RegexFormatter(String regex, FormatOption formatOption) {
+    this.regex = CharMatcher.whitespace().removeFrom(regex);
+    this.formatOption = Preconditions.checkNotNull(formatOption);
+  }
+
+  private String format() {
+    recurse(0, 0);
+    return out.toString();
+  }
+
+  // Assume at line start.
+  private int recurse(int pos, int level) {
+    while (pos < regex.length()) {
+      indent(level);
+      // Optionally printing closing group from previous recursion.
+      if (regex.charAt(pos) == ')') {
+        out.append(')');
+        pos++;
+      }
+      int nextToken = tokens.indexIn(regex, pos);
+      if (nextToken == -1) {
+        out.append(regex.substring(pos, regex.length()));
+        return regex.length();
+      }
+      out.append(regex.substring(pos, nextToken));
+      pos = nextToken;
+      switch (regex.charAt(pos)) {
+        case '(':
+          out.append("(");
+          pos++;
+          if (regex.indexOf("?:", pos) == pos) {
+            if (formatOption != FormatOption.FORCE_CAPTURING_GROUPS) {
+              out.append("?:");
+            }
+            pos += 2;
+          } else if (formatOption == FormatOption.FORCE_NON_CAPTURING_GROUPS) {
+            out.append("?:");
+          }
+          out.append('\n');
+          pos = recurse(pos, level + 1);
+          break;
+
+        case '|':
+          out.append("|\n");
+          pos++;
+          break;
+
+        case ')':
+          // Just exit recursion and let the parent write the ')', so don't update our position.
+          out.append("\n");
+          return pos;
+
+        default:
+          throw new AssertionError();
+      }
+    }
+    return pos;
+  }
+
+  private void indent(int level) {
+    while (level-- > 0) {
+      out.append("  ");
+    }
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexGenerator.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexGenerator.java
new file mode 100644
index 0000000000..5bd93f8974
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexGenerator.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.ALLOW_EDGE_SPLITTING;
+import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES;
+import static java.util.stream.Collectors.joining;
+
+import com.google.common.base.Preconditions;
+import com.google.common.graph.ValueGraph;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer;
+import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import java.util.Optional;
+
+/** Produces partially optimized regular expressions from {@code RangeTree}s. */
+public final class RegexGenerator {
+  private static final RegexGenerator BASIC = new RegexGenerator(false, false, false, false);
+
+  // NOTE: Tail optimization should remain disabled since it seems to undo some of the benefits of
+  // subgroup optimization. At some point the code can probably just be removed.
+  private static final RegexGenerator DEFAULT_XML =
+      BASIC.withDfaFactorization().withSubgroupOptimization();
+
+  /**
+   * Returns a basic regular expression generator with no optional optimizations enabled. This will
+   * produce regular expressions with a simpler structure than other generators but output will
+   * almost always be longer.
+   */
+  public static RegexGenerator basic() {
+    return BASIC;
+  }
+
+  /**
+   * Returns the default regex generator for XML data. This should be used by any tool wishing to
+   * obtain the same regular expressions as the legacy XML data. It is deliberately not specified
+   * as to which optimizations are enabled for this regular expression generator.
+   */
+  public static RegexGenerator defaultXmlGenerator() {
+    return DEFAULT_XML;
+  }
+
+  /**
+   * Returns a new regular expression generator which uses the {@code '.'} token for matching any
+   * digit (rather than {@code '\d'}). This results in shorter output, but possibly at the cost of
+   * performance on certain platforms (and a degree of readability).
+   */
+  public RegexGenerator withDotMatch() {
+    Preconditions.checkState(!this.useDotMatch, "Dot-matching already enabled");
+    return new RegexGenerator(true, this.factorizeDfa, this.optimizeSubgroups, this.optimizeTail);
+  }
+
+  /**
+   * Returns a new regular expression generator which applies a length-based factorization of the
+   * DFA graph in an attempt to reduce the number of problematic terminating states. This results
+   * in regular expressions with additional non-determinism, but which can greatly reduce size.
+   */
+  public RegexGenerator withDfaFactorization() {
+    Preconditions.checkState(!this.factorizeDfa, "Length based factorizing already enabled");
+    return new RegexGenerator(this.useDotMatch, true, this.optimizeSubgroups, this.optimizeTail);
+  }
+
+  /**
+   * Returns a new regular expression generator which applies experimental factorization of the
+   * DFA graph in an attempt to identify and handle subgroups which would cause repetition. This
+   * results in regular expressions with additional non-determinism, but which can greatly reduce
+   * size.
+   */
+  public RegexGenerator withSubgroupOptimization() {
+    Preconditions.checkState(!this.optimizeSubgroups, "Subgroup optimization already enabled");
+    return new RegexGenerator(this.useDotMatch, this.factorizeDfa, true, this.optimizeTail);
+  }
+
+  /**
+   * Returns a new regular expression generator which applies tail optimization to the intermediate
+   * NFA graph to factor out common trailing paths. This results in a small size improvement to
+   * many cases and does not adversely affect readability.
+   */
+  public RegexGenerator withTailOptimization() {
+    Preconditions.checkState(!this.optimizeTail, "Tail optimization already enabled");
+    return new RegexGenerator(this.useDotMatch, this.factorizeDfa, this.optimizeSubgroups, true);
+  }
+
+  private final boolean useDotMatch;
+  private final boolean factorizeDfa;
+  private final boolean optimizeSubgroups;
+  private final boolean optimizeTail;
+
+  private RegexGenerator(
+      boolean useDotMatch, boolean factorizeDfa, boolean optimizeSubgroups, boolean optimizeTail) {
+    this.useDotMatch = useDotMatch;
+    this.factorizeDfa = factorizeDfa;
+    this.optimizeSubgroups = optimizeSubgroups;
+    this.optimizeTail = optimizeTail;
+  }
+
+  /**
+   * Generates a regular expression from a range tree, applying the configured options for this
+   * generator.
+   */
+  public String toRegex(RangeTree ranges) {
+    // The regex of the empty range is "a regex that matches nothing". This is meaningless.
+    checkArgument(!ranges.isEmpty(),
+        "cannot generate regular expression from empty ranges");
+    // We cannot generate any regular expressions if there are no explicit state transitions in the
+    // graph (i.e. we can generate "(?:<re>)?" but only if "<re>" is non-empty). We just get
+    // "the regex that always immediately terminates after no input". This is also meaningless.
+    checkArgument(!ranges.getInitial().equals(RangeTree.getTerminal()),
+        "range tree must not contain only the empty digit sequence: %s", ranges);
+
+    String regex = generateFactorizedRegex(ranges);
+    if (optimizeSubgroups) {
+      regex = recursivelyOptimizeSubgroups(ranges, regex);
+    }
+    return regex;
+  }
+
+  private String recursivelyOptimizeSubgroups(RangeTree ranges, String regex) {
+    Optional<RangeTree> subgraphRanges = SubgroupOptimizer.extractRepeatingSubgraph(ranges);
+    if (subgraphRanges.isPresent()) {
+      RangeTree leftoverRanges = ranges.subtract(subgraphRanges.get());
+      String leftoverRegex = generateFactorizedRegex(leftoverRanges);
+      leftoverRegex = recursivelyOptimizeSubgroups(leftoverRanges, leftoverRegex);
+      String optimizedRegex = leftoverRegex + "|" + generateFactorizedRegex(subgraphRanges.get());
+      if (optimizedRegex.length() < regex.length()) {
+        regex = optimizedRegex;
+      }
+    }
+    return regex;
+  }
+
+  private String generateFactorizedRegex(RangeTree ranges) {
+    String regex = regexOf(ranges);
+    if (factorizeDfa) {
+      regex = generateFactorizedRegex(ranges, regex, REQUIRE_EQUAL_EDGES);
+      regex = generateFactorizedRegex(ranges, regex, ALLOW_EDGE_SPLITTING);
+    }
+    return regex;
+  }
+
+  private String generateFactorizedRegex(RangeTree dfa, String bestRegex, MergeStrategy strategy) {
+    String factoredRegex = RangeTreeFactorizer.factor(dfa, strategy).stream()
+        .map(this::regexOf)
+        .collect(joining("|"));
+    return factoredRegex.length() < bestRegex.length() ? factoredRegex : bestRegex;
+  }
+
+  private String regexOf(RangeTree ranges) {
+    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(ranges);
+    if (optimizeTail) {
+      nfa = TrailingPathOptimizer.optimize(nfa);
+    }
+    return EdgeWriter.toRegex(NfaFlattener.flatten(nfa), useDotMatch);
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/SubgroupOptimizer.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/SubgroupOptimizer.java
new file mode 100644
index 0000000000..8c612379ff
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/SubgroupOptimizer.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.collect.ImmutableList.toImmutableList;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.LinkedHashMultiset;
+import com.google.common.collect.Multiset;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.IntStream;
+import javax.annotation.Nullable;
+
+/**
+ * An optimization for RangeTree DFAs which attempts to isolate and extract subgraphs which would
+ * otherwise cause a lot of repetition in the generated regular expression.
+ */
+public final class SubgroupOptimizer {
+  /**
+   * Returns the subgraph which is likely to cause the most repetition in the regular expression
+   * of the given DFA. Subtracting the result out of the original range tree and generating two
+   * distinct regular expressions is likely to be shorter than the regular expression of the
+   * original range.
+   */
+  public static Optional<RangeTree> extractRepeatingSubgraph(RangeTree ranges) {
+    return LinkNodeVisitor
+        .findBridgingNode(ranges)
+        .flatMap(n -> SubgraphExtractionVisitor.extractSubgraph(ranges, n));
+  }
+
+  /**
+   * A visitor which applies two types of weights to every interior node in a DFA.
+   * <ul>
+   *   <li>A count of incoming edges to that node.
+   *   <li>A count of all edges in the subgraph rooted at that node.
+   * </ul>
+   * These are then multiplied together using the cost function:
+   * <pre>cost(n) = subgraph-weight(n) * (in-order(n) - 1)</pre>
+   * get get a proxy for the cost of additional duplicates likely to be created by this node.
+   */
+  static class LinkNodeVisitor implements DfaVisitor {
+    // Reasonable approximation for the cost of an edge in a subgraph is the length of the
+    // corresponding range specification (it doesn't work so well for repeated edges like
+    // 'xxxxxxxx' --> "\d{8}", but it's good to help break ties in the cost function).
+    private static final ImmutableList<Integer> EDGE_WEIGHTS =
+        IntStream.rangeClosed(1, 0x3FF)
+            .mapToObj(m -> RangeSpecification.toString(m).length())
+            .collect(toImmutableList());
+
+    // Important to use "linked" multisets here (at least for the one we iterate over) since
+    // otherwise we end up with non-deterministic regular expression generation.
+    private final Multiset<DfaNode> inOrder = LinkedHashMultiset.create();
+    private final Multiset<DfaNode> subgraphWeight = LinkedHashMultiset.create();
+
+    /**
+     * Returns the interior node whose subgraph is likely to cause the most repetition in the
+     * regular expression of the given DFA.
+     */
+    static Optional<DfaNode> findBridgingNode(RangeTree ranges) {
+      checkArgument(!ranges.isEmpty(), "cannot visit empty ranges");
+      LinkNodeVisitor v = new LinkNodeVisitor();
+      ranges.accept(v);
+      return Optional.ofNullable(v.getHighestCostNode());
+    }
+
+    private static int getEdgeWeight(DfaEdge edge) {
+      // Subtract 1 since the array is 1-based (a zero edge mask is not legal).
+      return EDGE_WEIGHTS.get(edge.getDigitMask() - 1);
+    }
+
+    @VisibleForTesting
+    int getSubgraphWeight(DfaNode n) {
+      return subgraphWeight.count(n);
+    }
+
+    @VisibleForTesting
+    int getInOrder(DfaNode n) {
+      return inOrder.count(n);
+    }
+
+    // This returns null if no edge has a cost greater than zero. Since the cost function uses
+    // (in-order(n) - 1) this is trivially true for any graph where all interior nodes have only
+    // a single in-edge (the terminal node can have more than one in-edge, but it has a weight of
+    // zero and the initial node is never considered a candidate).
+    @VisibleForTesting
+    @Nullable
+    DfaNode getHighestCostNode() {
+      DfaNode node = null;
+      int maxWeight = 0;
+      for (DfaNode n : inOrder.elementSet()) {
+        int weight = getSubgraphWeight(n) * (getInOrder(n) - 1);
+        if (weight > maxWeight) {
+          maxWeight = weight;
+          node = n;
+        }
+      }
+      return node;
+    }
+
+    @Override
+    public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
+      // The weight is zero only if we haven't visited this node before (or it's the terminal).
+      int targetWeight = subgraphWeight.count(target);
+      if (targetWeight == 0 && !target.equals(RangeTree.getTerminal())) {
+        target.accept(this);
+        targetWeight = subgraphWeight.count(target);
+      }
+      // Add an extra one for the edge we are processing now and increment our target's in-order.
+      subgraphWeight.add(source, targetWeight + getEdgeWeight(edge));
+      inOrder.add(target);
+    }
+  }
+
+  /**
+   * A visitor to extract the subgraph of a DFA which passes through a specified interior
+   * "bridging" node.
+   */
+  private static class SubgraphExtractionVisitor implements DfaVisitor {
+    private final DfaNode bridgingNode;
+    private final List<RangeSpecification> paths = new ArrayList<>();
+    private RangeSpecification path = RangeSpecification.empty();
+    private boolean sawBridgingNode = false;
+    private boolean splitHappens = false;
+
+    /** Returns the subgraph which passes through the specified node. */
+    static Optional<RangeTree> extractSubgraph(RangeTree ranges, DfaNode node) {
+      SubgraphExtractionVisitor v = new SubgraphExtractionVisitor(node);
+      ranges.accept(v);
+      // Only return proper subgraphs.
+      return v.splitHappens ? Optional.of(RangeTree.from(v.paths)) : Optional.empty();
+    }
+
+    private SubgraphExtractionVisitor(DfaNode bridgingNode) {
+      this.bridgingNode = checkNotNull(bridgingNode);
+    }
+
+    @Override
+    public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
+      RangeSpecification oldPath = path;
+      path = path.extendByMask(edge.getDigitMask());
+      // Potentially emit paths for any terminating node (not just the end of the graph). We have
+      // to extract the entire sub-graph _after_ the bridging node, including terminating nodes.
+      if (target.canTerminate()) {
+        // Emit path if we are "below" the bridging node.
+        if (sawBridgingNode) {
+          paths.add(path);
+        } else {
+          // Records that there were other paths not in the subgroup (since we only want to return
+          // a new DFA that's a proper subgraph of the original graph).
+          splitHappens = true;
+        }
+      }
+      if (target.equals(bridgingNode)) {
+        // Recurse with the flag set to emit paths once we hit the terminal node (note that the
+        // bridging node cannot be the terminal node).
+        sawBridgingNode = true;
+        target.accept(this);
+        sawBridgingNode = false;
+      } else {
+        // Recurse normally regardless of the flag.
+        target.accept(this);
+      }
+      path = oldPath;
+    }
+  }
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizer.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizer.java
new file mode 100644
index 0000000000..48c0e96c62
--- /dev/null
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizer.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
+import static java.util.Comparator.naturalOrder;
+import static java.util.stream.Collectors.toList;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.graph.Graphs;
+import com.google.common.graph.MutableValueGraph;
+import com.google.common.graph.ValueGraph;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Optional;
+
+/**
+ * Optimizer for NFA graphs which attempts to restructure the trailing paths to maximize sharing
+ * and hopefully minimize the amount of duplication in the resulting regular expression.
+ */
+public final class TrailingPathOptimizer {
+  /**
+   * Optimizes an NFA graph to make trailing "any digit" sequences common where possible. In many
+   * cases this will result in no change to the structure of the NFA (common trailing paths are
+   * not a feature of every NFA), but in some cases a substantial reduction in duplication can
+   * occur.
+   *
+   * <p>This is equivalent to recognizing that {@code "12\d{2}\d{2}?|34\d{2}|56\d{3}"} can be
+   * written as {@code "(?:12\d{2}?|34|56\d)\d{2}"}.
+   */
+  public static ValueGraph<Node, SimpleEdge> optimize(ValueGraph<Node, SimpleEdge> graph) {
+    MutableValueGraph<Node, SimpleEdge> out = Graphs.copyOf(graph);
+
+    // Build a map of trailing "any digit" sequences (key is the node it starts from).
+    Map<Node, AnyPath> anyPaths = new HashMap<>();
+    recursivelyDetachTrailingPaths(Node.TERMINAL, AnyPath.EMPTY, out, anyPaths);
+
+    // If the terminal node has no "any digit" sequences leading to it, there's nothing we can do
+    // (well not in this simplistic algorithm anyway). This should almost never happen for phone
+    // number matching graphs as it implies a match expression that can terminate at a precise
+    // digit, rather than any digit. The only time this might occur is for short-codes, but due to
+    // their size it's likely to be fine if we don't try to aggressively optimize them.
+    if (anyPaths.size() == 1 && anyPaths.containsKey(Node.TERMINAL)) {
+      return graph;
+    }
+    // This is just a way to find a node from which we can start generating new nodes.
+    Node lastAddedNode = out.nodes().stream().max(naturalOrder()).get();
+
+    // Process paths from short to long (since some paths are sub-paths of longer ones).
+    List<Node> shortestPathsFirst = anyPaths.entrySet().stream()
+        .sorted(Comparator.comparing(Entry::getValue))
+        .map(Entry::getKey)
+        .collect(toList());
+    Node pathEnd = Node.TERMINAL;
+    while (true) {
+      // Start with the next path that might be a factor of all the remaining paths.
+      Node shortestPathNode = shortestPathsFirst.get(0);
+      AnyPath shortestPath = anyPaths.get(shortestPathNode);
+      int pathsToFactor = shortestPathsFirst.size() - 1;
+      if (pathsToFactor == 0) {
+        // If all paths are factored, we're done.
+        break;
+      }
+      // Factor all the remaining paths by the shortest path (where a missing result means it
+      // cannot be factored).
+      ImmutableList<AnyPath> factored = shortestPathsFirst.stream()
+          .skip(1)
+          .map(n -> anyPaths.get(n).factor(shortestPath))
+          .filter(Optional::isPresent)
+          .map(Optional::get)
+          .collect(toImmutableList());
+      // If not all the remaining paths have the shortest path as a common factor, we're done (in
+      // this simplistic algorithm we don't consider cases where an AnyPath is the factor of some,
+      // but not all, other paths; we could but it's far less likely to reduce regex size).
+      if (factored.size() < pathsToFactor) {
+        break;
+      }
+      // Shortest path is a factor of all remaining paths, so add a new path to the graph for it.
+      lastAddedNode = addPath(shortestPathNode, pathEnd, shortestPath, lastAddedNode, out);
+      // We're done with this path, but might still be able to find more factors of remaining paths.
+      anyPaths.remove(shortestPathNode);
+      shortestPathsFirst.remove(0);  // index, not value.
+      // The newly factored edges now replace the original factors in the map.
+      for (int n = 0; n < factored.size(); n++) {
+        Preconditions.checkState(anyPaths.containsKey(shortestPathsFirst.get(n)));
+        anyPaths.put(shortestPathsFirst.get(n), factored.get(n));
+      }
+      // We now connect any new factored edges to the node we just added (not the terminal node).
+      pathEnd = shortestPathNode;
+    }
+    // If we exit, we must still reconnect any remaining, unfactored, paths to the graph.
+    for (Map.Entry<Node, AnyPath> e : anyPaths.entrySet()) {
+      lastAddedNode = addPath(e.getKey(), pathEnd, e.getValue(), lastAddedNode, out);
+    }
+    return out;
+  }
+
+  /**
+   * Recursively build up a map of trailing "any digit" sequences (AnyPath), starting from some
+   * current node (initially the terminal node) and working backwards. The key in the map is the
+   * node at which the AnyPath value starts from. Edges and nodes are removed from the graph,
+   * leaving "ragged" paths which will need to be reconnected later (the keys in the map are the
+   * set of nodes that need to be reconnected).
+   *
+   * @return whether the given node is the start of an AnyPath (i.e. if it immediately follows any
+   *     edges which are not "any digit" sequences).
+   */
+  private static boolean recursivelyDetachTrailingPaths(
+      Node node, AnyPath path, MutableValueGraph<Node, SimpleEdge> g, Map<Node, AnyPath> anyPaths) {
+    if (beginsAnAnyPath(node, g)) {
+      anyPaths.put(node, path);
+      return true;
+    }
+    // All incoming edges accept all digits, so we can recurse (but don't traverse epsilons).
+    List<Node> sources = g.predecessors(node).stream()
+        .filter(s -> !g.edgeValue(s, node).get().equals(Edge.epsilon()))
+        .collect(toList());
+    for (Node source : sources) {
+      AnyPath newPath = path.extend(canTerminate(source, g));
+      // Recurse to remove trailing paths higher in the tree and keep this source node only if
+      // recursion stopped here.
+      boolean keepSourceNode = recursivelyDetachTrailingPaths(source, newPath, g, anyPaths);
+      g.removeEdge(source, node);
+      // This removes the epsilon if it exists (and does nothing otherwise). This is safe since we
+      // know the other out-edge of this node accepts all digits, so the only remaining type of
+      // edge that could exist is an epsilon. After removing both we expect not to find any others.
+      g.removeEdge(source, Node.TERMINAL);
+      Preconditions.checkState(g.outDegree(source) == 0, "unexpected out edges in trailing graph");
+      // If we were able to recurse past this node, it can be removed.
+      if (!keepSourceNode) {
+        g.removeNode(source);
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns whether the given node has incoming edges that do not just accept "any digit". This is
+   * the point at which recursion must stop since AnyPath can only represent "any digit" sequences.
+   */
+  private static boolean beginsAnAnyPath(Node target, ValueGraph<Node, SimpleEdge> g) {
+    // Obviously we cannot recurse past the initial node.
+    if (target == Node.INITIAL) {
+      return true;
+    }
+    return g.predecessors(target).stream()
+        .map(s -> g.edgeValue(s, target).get())
+        .filter(e -> !e.equals(Edge.epsilon()))
+        .anyMatch(e -> e.getDigitMask() != ALL_DIGITS_MASK);
+  }
+
+  /**
+   * Returns whether this node can terminate. This logic relies on the input graph not having had
+   * its epsilon edges moved (i.e. if an epsilon edge exists it must point to the terminal node).
+   * This also looks for special "optional" edges which exist when a non-epsilon edge already
+   * exists from this node to the terminal node.
+   */
+  private static boolean canTerminate(Node node, ValueGraph<Node, SimpleEdge> g) {
+    return g.successors(node).stream()
+        .map(t -> g.edgeValue(node, t).get())
+        .anyMatch(e -> e.isOptional() || e.equals(Edge.epsilon()));
+  }
+
+  /** Adds the given "AnyPath" into the graph, generating new nodes and edges as necessary. */
+  private static Node addPath(
+      Node node, Node end, AnyPath path, Node lastAdded, MutableValueGraph<Node, SimpleEdge> out) {
+    // Path length is always at least 1 for an AnyPath.
+    int pathLength = path.maxLength();
+    for (int n = 0; n < pathLength - 1; n++) {
+      if (path.acceptsLength(n)) {
+        out.putEdgeValue(node, end, Edge.epsilon());
+      }
+      lastAdded = lastAdded.createNext();
+      out.addNode(lastAdded);
+      out.putEdgeValue(node, lastAdded, Edge.any());
+      node = lastAdded;
+    }
+    // For the last edge we cannot add a parallel epsilon path if we need to skip to the end,
+    // so add the special "optional any" edge instead.
+    out.putEdgeValue(
+        node, end, path.acceptsLength(pathLength - 1) ? Edge.optionalAny() : Edge.any());
+    return lastAdded;
+  }
+
+  private TrailingPathOptimizer() {}
+}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java
index bb8329c97c..8434e13177 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java
@@ -73,19 +73,23 @@ public void accept(Stream<String> row) {
           } else {
             ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
             // Not a pure lambda due to the need to index columns.
-            row.forEach(new Consumer<String>() {
-              private int i = 0;
-
-              @Override
-              public void accept(String v) {
-                checkArgument(i < header.size(),
-                    "too many columns (expected %s): %s", header.size(), map);
-                if (!v.isEmpty()) {
-                  map.put(header.get(i++), v);
-                }
-              }
-            });
-            handler.accept(map.build());
+            row.forEach(
+                new Consumer<String>() {
+                  private int i = 0;
+
+                  @Override
+                  public void accept(String v) {
+                    checkArgument(
+                        i < header.size(),
+                        "too many columns (expected %s): %s",
+                        header.size(),
+                        map);
+                    if (!v.isEmpty()) {
+                      map.put(header.get(i++), v);
+                    }
+                  }
+                });
+            handler.accept(map.buildOrThrow());
           }
         }
       };
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java
index 7a96596c93..98a287647e 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java
@@ -582,7 +582,7 @@ public static String unescapeSingleLineCsvText(String s) {
           .put('r', '\r')
           .put('t', '\t')
           .put('\\', '\\')
-          .build();
+          .buildOrThrow();
 
   // Visible for AutoValue only.
   CsvTable() {}
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java
index c7db3f78d9..fdfe7fe065 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java
@@ -628,7 +628,7 @@ public <T extends Comparable<T>> ImmutableMap<T, PrefixTree> getPrefixMap(
       RangeTree include = getRanges(column, value);
       map.put(value, PrefixTree.minimal(include, allRanges.subtract(include), minPrefixLength));
     }
-    return map.build();
+    return map.buildOrThrow();
   }
 
   // Constants for the simplification routine below.
diff --git a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java
index eaf4e72367..9abb3af883 100644
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java
@@ -55,7 +55,7 @@ public Builder add(ColumnGroup<?, ?> group) {
     }
 
     public Schema build() {
-      return new AutoValue_Schema(names.build(), columns.build(), groups.build());
+      return new AutoValue_Schema(names.build(), columns.buildOrThrow(), groups.buildOrThrow());
     }
   }
 
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/LengthsParserTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/LengthsParserTest.java
new file mode 100644
index 0000000000..1b2b1e18de
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/LengthsParserTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2022 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public final class LengthsParserTest {
+
+  @Test
+  public void shouldThrowIfStringContainsForbiddenCharacters() {
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("a-6,7"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("8, B, C"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("8, ,10"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("4, +7-9, +11"));
+  }
+
+  @Test
+  public void shouldThrowIfNumbersAreOutOfOrder() {
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("9-7"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("8,12-11"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("5,4,7-8"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("6-8, 7-9"));
+  }
+
+  @Test
+  public void shouldThrowIfFormatIsWrong() {
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("4-6-8"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("7-"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("3, -7"));
+    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("1 2-3 4, 5 6"));
+  }
+
+  @Test
+  public void testParseSingletons() {
+    assertThat(LengthsParser.parseLengths("8")).containsExactly(8);
+    assertThat(LengthsParser.parseLengths("14")).containsExactly(14);
+  }
+
+  @Test
+  public void testParseCommaSeparatedNumbers() {
+    assertThat(LengthsParser.parseLengths("6,8,9")).containsExactly(6, 8, 9);
+    assertThat(LengthsParser.parseLengths("13, 14")).containsExactly(13, 14);
+  }
+
+  @Test
+  public void testParseRanges() {
+    assertThat(LengthsParser.parseLengths("6-8")).containsExactly(6, 7, 8);
+    assertThat(LengthsParser.parseLengths("13 - 14")).containsExactly(13, 14);
+  }
+
+  @Test
+  public void testParseComplex() {
+    assertThat(LengthsParser.parseLengths("4,7,9-12")).containsExactly(4, 7, 9, 10, 11, 12);
+    assertThat(LengthsParser.parseLengths("4-6, 8, 10-12")).containsExactly(4, 5, 6, 8, 10, 11, 12);
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java
index 6869f415d7..5bf1e4aaf6 100644
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java
@@ -20,8 +20,8 @@
 import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.parse;
-import static java.util.Arrays.asList;
 import static com.google.i18n.phonenumbers.metadata.testing.AssertUtil.assertThrows;
+import static java.util.Arrays.asList;
 
 import com.google.common.collect.ImmutableRangeSet;
 import com.google.common.collect.Range;
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcherTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcherTest.java
new file mode 100644
index 0000000000..f089c8f5ab
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcherTest.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.INVALID;
+import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.MATCHED;
+import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.TOO_LONG;
+import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.TOO_SHORT;
+
+import com.google.common.base.CharMatcher;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.DigitSequence;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler;
+import com.google.i18n.phonenumbers.metadata.regex.RegexGenerator;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class DigitSequenceMatcherTest {
+
+  @Test public void testStringDigits() {
+    DigitSequence digits = DigitSequenceMatcher.digitsFromString("1234");
+
+    Assert.assertTrue(digits.hasNext());
+    Assert.assertEquals(1, digits.next());
+    Assert.assertTrue(digits.hasNext());
+    Assert.assertEquals(2, digits.next());
+    Assert.assertTrue(digits.hasNext());
+    Assert.assertEquals(3, digits.next());
+    Assert.assertTrue(digits.hasNext());
+    Assert.assertEquals(4, digits.next());
+    Assert.assertFalse(digits.hasNext());
+  }
+
+  @Test public void testSingleDigitMatching() {
+    assertNotMatches(ranges("0"), INVALID, "1", "9");
+    assertNotMatches(ranges("0"), TOO_LONG, "00");
+
+    assertMatches(ranges("x"), "0", "5", "9");
+    assertNotMatches(ranges("x"), TOO_SHORT, "");
+    assertNotMatches(ranges("x"), TOO_LONG, "00");
+
+    assertMatches(ranges("[2-6]"), "2", "3", "4", "5", "6");
+    assertNotMatches(ranges("[2-6]"), INVALID, "0", "1", "7", "8", "9");
+    assertNotMatches(ranges("[2-6]"), TOO_LONG, "26");
+  }
+
+  @Test public void testOptional() {
+    RangeTree dfa = ranges("12", "123");
+    assertMatches(ranges("12", "123"), "12", "123");
+    assertNotMatches(dfa, TOO_SHORT, "1");
+    assertNotMatches(dfa, INVALID, "13");
+    assertNotMatches(dfa, TOO_LONG, "1233");
+  }
+
+  @Test public void testRepetition() {
+    assertMatches(ranges("12xx", "12xxx", "12xxxx"), "1234", "12345", "123456");
+  }
+
+  @Test public void testOr() {
+    RangeTree dfa = ranges("01", "23");
+    assertMatches(dfa, "01", "23");
+    assertNotMatches(dfa, INVALID, "03", "12");
+    assertNotMatches(dfa, TOO_SHORT, "0", "2");
+    assertNotMatches(dfa, TOO_LONG, "011", "233");
+
+    assertMatches(ranges("01", "23", "45", "6789"), "01", "23", "45", "6789");
+  }
+
+  @Test public void testRealRegexShort() {
+    RangeTree dfa = ranges(
+        "11[2-7]xxxxxxx",
+        "2[02][2-7]xxxxxxx",
+        "33[2-7]xxxxxxx",
+        "4[04][2-7]xxxxxxx",
+        "79[2-7]xxxxxxx",
+        "80[2-467]xxxxxxx");
+
+    assertMatches(dfa, "112 1234567", "797 1234567", "807 1234567");
+    assertNotMatches(dfa, TOO_SHORT, "112 123", "797 12345", "807 123456");
+    assertNotMatches(dfa, TOO_LONG, "112 12345678", "797 123456789");
+    assertNotMatches(dfa, INVALID, "122 1234567", "799 1234567", "805 1234567");
+  }
+
+  @Test public void testRealRegexLong() {
+    RangeTree dfa = ranges(
+        "12[0-249][2-7]xxxxxx",
+        "13[0-25][2-7]xxxxxx",
+        "14[145][2-7]xxxxxx",
+        "1[59][14][2-7]xxxxxx",
+        "16[014][2-7]xxxxxx",
+        "17[1257][2-7]xxxxxx",
+        "18[01346][2-7]xxxxxx",
+        "21[257][2-7]xxxxxx",
+        "23[013][2-7]xxxxxx",
+        "24[01][2-7]xxxxxx",
+        "25[0137][2-7]xxxxxx",
+        "26[0158][2-7]xxxxxx",
+        "278[2-7]xxxxxx",
+        "28[1568][2-7]xxxxxx",
+        "29[14][2-7]xxxxxx",
+        "326[2-7]xxxxxx",
+        "34[1-3][2-7]xxxxxx",
+        "35[34][2-7]xxxxxx",
+        "36[01489][2-7]xxxxxx",
+        "37[02-46][2-7]xxxxxx",
+        "38[159][2-7]xxxxxx",
+        "41[36][2-7]xxxxxx",
+        "42[1-47][2-7]xxxxxx",
+        "43[15][2-7]xxxxxx",
+        "45[12][2-7]xxxxxx",
+        "46[126-9][2-7]xxxxxx",
+        "47[0-24-9][2-7]xxxxxx",
+        "48[013-57][2-7]xxxxxx",
+        "49[014-7][2-7]xxxxxx",
+        "5[136][25][2-7]xxxxxx",
+        "522[2-7]xxxxxx",
+        "54[28][2-7]xxxxxx",
+        "55[12][2-7]xxxxxx",
+        "5[78]1[2-7]xxxxxx",
+        "59[15][2-7]xxxxxx",
+        "612[2-7]xxxxxx",
+        "6[2-4]1[2-7]xxxxxx",
+        "65[17][2-7]xxxxxx",
+        "66[13][2-7]xxxxxx",
+        "67[14][2-7]xxxxxx",
+        "680[2-7]xxxxxx",
+        "712[2-7]xxxxxx",
+        "72[14][2-7]xxxxxx",
+        "73[134][2-7]xxxxxx",
+        "74[47][2-7]xxxxxx",
+        "75[15][2-7]xxxxxx",
+        "7[67]1[2-7]xxxxxx",
+        "788[2-7]xxxxxx",
+        "816[2-7]xxxxxx",
+        "82[014][2-7]xxxxxx",
+        "83[126][2-7]xxxxxx",
+        "86[136][2-7]xxxxxx",
+        "87[078][2-7]xxxxxx",
+        "88[34][2-7]xxxxxx",
+        "891[2-7]xxxxxx");
+
+    assertMatches(dfa, "364 2 123456", "674 4 123456", "883 7 123456");
+    assertNotMatches(dfa, TOO_SHORT, "364 2 123", "674 4 1234", "883 7 12345");
+    assertNotMatches(dfa, TOO_LONG, "364 2 1234567", "674 4 12345678");
+    assertNotMatches(dfa, INVALID,
+        "365 2 123456", "364 8 123456", "670 4 123456", "670 5 123456", "892 2 123456");
+  }
+
+  private static RangeTree ranges(String... lines) {
+    return RangeTree.from(Arrays.stream(lines).map(RangeSpecification::parse));
+  }
+
+  private static void assertMatches(RangeTree dfa, String... numbers) {
+    checkRegex(dfa, true, numbers);
+    byte[] matcherData = MatcherCompiler.compile(dfa);
+
+    DigitSequenceMatcher matcher = DigitSequenceMatcher.create(matcherData);
+    assertMatcher(matcher, MATCHED, numbers);
+  }
+
+  private static void assertNotMatches(RangeTree dfa, Result error, String... numbers) {
+    checkArgument(error != MATCHED);
+    checkRegex(dfa, false, numbers);
+    byte[] matcherData = MatcherCompiler.compile(dfa);
+    DigitSequenceMatcher matcher = DigitSequenceMatcher.create(matcherData);
+    assertMatcher(matcher, error, numbers);
+  }
+
+  private static void checkRegex(RangeTree dfa, boolean expectMatch, String... numbers) {
+    Pattern pattern = Pattern.compile(RegexGenerator.basic().toRegex(dfa));
+    for (String number : numbers) {
+      checkArgument(expectMatch == pattern.matcher(noSpace(number)).matches(),
+          "regex %s could not match input %s", dfa.asRangeSpecifications(), number);
+    }
+  }
+
+  private static void assertMatcher(
+      DigitSequenceMatcher matcher, Result expected, String... numbers) {
+    for (final String number : numbers) {
+      Assert.assertEquals(expected,
+          matcher.match(DigitSequenceMatcher.digitsFromString(noSpace(number))));
+    }
+  }
+
+  private static String noSpace(String input) {
+    return CharMatcher.whitespace().removeFrom(input);
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/CompilerRegressionTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/CompilerRegressionTest.java
new file mode 100644
index 0000000000..e8a96776db
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/CompilerRegressionTest.java
@@ -0,0 +1,317 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.common.truth.Truth.assertWithMessage;
+import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
+import static java.lang.Integer.bitCount;
+import static java.lang.Integer.lowestOneBit;
+import static java.lang.Integer.numberOfTrailingZeros;
+
+import com.google.common.collect.Multimap;
+import com.google.common.collect.MultimapBuilder;
+import com.google.common.collect.SetMultimap;
+import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto;
+import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.TestCase;
+import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.Tests;
+import com.google.i18n.phonenumbers.metadata.DigitSequence;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.TextFormat;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class CompilerRegressionTest {
+  // Tests that the compiler produces the expected output, byte-for-byte.
+  @Test
+  public void testCompiledBytesEqualExpectedMatcherBytes() throws IOException {
+    StringWriter buffer = new StringWriter();
+    PrintWriter errors = new PrintWriter(buffer);
+    try (InputStream data =
+        CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) {
+      Tests.Builder tests = RegressionTestProto.Tests.newBuilder();
+      TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests);
+      for (TestCase tc : tests.getTestCaseList()) {
+        byte[] actual = MatcherCompiler.compile(ranges(tc.getRangeList()));
+        byte[] expected = combine(tc.getExpectedList());
+        int diffIndex = indexOfDiff(actual, expected);
+        if (!tc.getShouldFail()) {
+          if (diffIndex != -1) {
+            errors.format("FAILED [%s]: First difference at index %d\n", tc.getName(), diffIndex);
+            errors.format("Actual  : %s\n", formatPbSnippet(actual, diffIndex, 20));
+            errors.format("Expected: %s\n", formatPbSnippet(expected, diffIndex, 20));
+            writeGoldenPbOutput(actual, errors);
+          }
+        } else {
+          if (diffIndex == -1) {
+            errors.format("FAILED [%s]: Expected difference, but got none\n", tc.getName());
+          }
+        }
+      }
+    }
+    String errorMessage = buffer.toString();
+    if (!errorMessage.isEmpty()) {
+      assertWithMessage(errorMessage).fail();
+    }
+  }
+
+  // Test that the matcher behaves correctly with respect to the input ranges using the expected
+  // byte sequences. If this test fails, then the matcher implementation is doing something wrong,
+  // or the expected bytes were generated incorrectly (either by hand or from the compiler).
+  //
+  // IMPORTANT: This test tests that the expected bytes (rather than the compiled bytes) match the
+  // numbers in the ranges. This avoids the risk of any bugs in both the matcher and compiler
+  // somehow cancelling each other out. However this also means that this test depends on the
+  // equality test above for validity (i.e. this test can pass even if the matcher compiler is
+  // broken, so it should not be run in isolation when debugging).
+  @Test
+  public void testExpectedMatcherBytesMatchRanges() throws IOException {
+    try (InputStream data =
+        CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) {
+      RegressionTestProto.Tests.Builder tests = RegressionTestProto.Tests.newBuilder();
+      TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests);
+      for (TestCase tc : tests.getTestCaseList()) {
+        RangeTree ranges = ranges(tc.getRangeList());
+        // If we compiled the ranges here, we could risk a situation where the compiled bytes were
+        // broken but the compiler had a corresponding bug that cancelled it out. This test only
+        // tests the matcher behaviour, whereas the test above only tests the compiler behaviour.
+        DigitSequenceMatcher matcher = DigitSequenceMatcher.create(combine(tc.getExpectedList()));
+        Multimap<Result, DigitSequence> numbers = buildTestNumbers(ranges);
+        if (!tc.getShouldFail()) {
+          testExpectedMatch(tc.getName(), matcher, numbers);
+        } else {
+          testExpectedFailure(tc.getName(), matcher, numbers);
+        }
+      }
+    }
+  }
+
+  private static void testExpectedMatch(String testName, DigitSequenceMatcher matcher,
+      Multimap<Result, DigitSequence> numbers) {
+    for (Result expectedResult : Result.values()) {
+      for (DigitSequence s : numbers.get(expectedResult)) {
+        Result result = matcher.match(new Sequence(s));
+        assertWithMessage("FAILED [%s]: Sequence %s", testName, s)
+            .that(result).isEqualTo(expectedResult);
+      }
+    }
+  }
+
+  private static void testExpectedFailure(String testName, DigitSequenceMatcher matcher,
+      Multimap<Result, DigitSequence> numbers) {
+    for (Result expectedResult : Result.values()) {
+      for (DigitSequence s : numbers.get(expectedResult)) {
+        Result result = matcher.match(new Sequence(s));
+        if (result != expectedResult) {
+          return;
+        }
+      }
+    }
+    assertWithMessage("FAILED [%s]: Expected at least one failure", testName).fail();
+  }
+
+  // Magic number: DigitSequences cannot be longer than 18 digits at the moment, so a check is
+  // needed to prevent us trying to make a longer-than-allowed sequences in tests. This only
+  // happens in the case of a terminal node, since non-terminal paths must be < 17 digits long.
+  // If the allowed digits increases, this value can be modified or left as-is.
+  private static final int MAX_SEQUENCE_LENGTH = 18;
+
+  // Trivial adapter from the metadata DigitSequence to the matcher's lightweight sequence.
+  private static final class Sequence implements DigitSequenceMatcher.DigitSequence {
+    private final DigitSequence seq;
+    private int index = 0;
+
+    Sequence(DigitSequence seq) {
+      this.seq = seq;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return index < seq.length();
+    }
+
+    @Override
+    public int next() {
+      return seq.getDigit(index++);
+    }
+  }
+
+  // Returns a RangeTree for the list of RangeSpecification strings.
+  RangeTree ranges(List<String> specs) {
+    return RangeTree.from(specs.stream().map(RangeSpecification::parse).collect(toImmutableList()));
+  }
+
+  // Builds a map of numbers for the given RangeTree to test every branching point in the DFA.
+  // All paths combinations are generated exactly once to give coverage. This does use pseudo
+  // random numbers to pick random digits from masks, but it should not be flaky. If it _ever_
+  // fails then it implies a serious problem with the matcher compiler or matcher implementation.
+  private static Multimap<Result, DigitSequence> buildTestNumbers(RangeTree ranges) {
+    SetMultimap<Result, DigitSequence> numbers =
+        MultimapBuilder.enumKeys(Result.class).treeSetValues().build();
+    Set<DfaNode> visited = new HashSet<>();
+    ranges.accept(new Visitor(RangeSpecification.empty(), numbers, visited));
+    return numbers;
+  }
+
+  /**
+   * Visitor to generate a targeted set of test numbers from a range tree DFA, which should
+   * exercise every instruction in the corresponding matcher data. These numbers should ensure
+   * that every "branch" (including early terminations) is taken at least once. Where digits
+   * should be equivalent (i.e. both x & y have the same effect) they are chosen randomly, since
+   * otherwise you would need to generate billions of numbers to cover every possible combination.
+   */
+  private static final class Visitor implements DfaVisitor {
+    private final RangeSpecification sourcePath;
+    private final SetMultimap<Result, DigitSequence> numbers;
+    private final Set<DfaNode> visited;
+    private int outEdgesMask = 0;
+
+    Visitor(RangeSpecification sourcePath,
+        SetMultimap<Result, DigitSequence> numbers,
+        Set<DfaNode> visited) {
+      this.sourcePath = sourcePath;
+      this.numbers = numbers;
+      this.visited = visited;
+    }
+
+    @Override
+    public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
+      // Record the current outgoing edge mask.
+      int mask = edge.getDigitMask();
+      outEdgesMask |= mask;
+      // Get the current path and add a test number for it.
+      RangeSpecification path = sourcePath.extendByMask(mask);
+      numbers.put(target.canTerminate() ? Result.MATCHED : Result.TOO_SHORT, sequenceIn(path));
+      // Avoid recursing into nodes we've already visited. This avoids generating many (hundreds)
+      // of test numbers for nodes which are reachable in many ways (via many path prefixes). This
+      // is an optional check and could be removed, but for testing larger ranges it seems to make
+      // a difference in test time. DFA node/instruction coverage should be unaffected by this.
+      if (visited.contains(target)) {
+        return;
+      }
+      visited.add(target);
+      // Recurse into the next level with a new visitor starting from our path (it's okay to visit
+      // the terminal node here since it does nothing and leaves the out edges mask zero).
+      Visitor childVisitor = new Visitor(path, numbers, visited);
+      target.accept(childVisitor);
+      // After recursion, find out which of our target's out-edges cannot be reached.
+      int unreachableMask = ~childVisitor.outEdgesMask & ALL_DIGITS_MASK;
+      if (unreachableMask != 0 && path.length() < MAX_SEQUENCE_LENGTH) {
+        // Create a path which cannot be reached directly from our target node. If this is the
+        // terminal node then we create a path that's too long, otherwise it's just invalid.
+        Result expected = target.equals(RangeTree.getTerminal()) ? Result.TOO_LONG : Result.INVALID;
+        numbers.put(expected, sequenceIn(path.extendByMask(unreachableMask)));
+      }
+    }
+  }
+
+  // Returns a pseudo randomly chosen sequence from the given path.
+  private static final DigitSequence sequenceIn(RangeSpecification path) {
+    DigitSequence seq = DigitSequence.empty();
+    for (int n = 0; n < path.length(); n++) {
+      int mask = path.getBitmask(n);
+      // A random number M in [0..BitCount), not the bit itself.
+      // E.g. mask = 0011010011 ==> (0 <= maskBit < 5) (allowed digits are {0,1,4,6,7})
+      int maskBit = (int) (bitCount(mask) * Math.random());
+      // Mask out the M lower bits which come before the randomly selected one.
+      // E.g. maskBit = 3 ==> mask = 0011000000 (3 lower bits cleared)
+      while (maskBit > 0) {
+        mask &= ~lowestOneBit(mask);
+        maskBit--;
+      }
+      // Extend the sequence by the digit value of the randomly selected bit.
+      // E.g. mask = 0011000000 ==> digit = 6 (randomly chosen from the allowed digits).
+      seq = seq.extendBy(numberOfTrailingZeros(mask));
+    }
+    return seq;
+  }
+
+  // Combines multiple ByteStrings into a single byte[] (we allow splitting in the regression test
+  // file for readability.
+  private static byte[] combine(List<ByteString> bytes) {
+    int size = bytes.stream().mapToInt(ByteString::size).sum();
+    byte[] out = new byte[size];
+    int offset = 0;
+    for (ByteString b : bytes) {
+      b.copyTo(out, offset);
+      offset += b.size();
+    }
+    return out;
+  }
+
+  // Return the index of the first difference, or -1 is the byte arrays are the same.
+  private static int indexOfDiff(byte[] a, byte[] b) {
+    int length = Math.min(a.length, b.length);
+    for (int n = 0; n < length; n++) {
+      if (a[n] != b[n]) {
+        return n;
+      }
+    }
+    return (a.length == length && b.length == length) ? -1 : length;
+  }
+
+  // Formats a subset of the bytes as a human readable snippet using C-style hex escaping (which
+  // is compatible with the regression test data).
+  private static String formatPbSnippet(byte[] bytes, int start, int length) {
+    StringBuilder out = new StringBuilder();
+    if (start > 0) {
+      out.append("...");
+    }
+    appendBytes(out, bytes, start, length);
+    if (start + length < bytes.length) {
+      out.append("...");
+    }
+    return out.toString();
+  }
+
+  // Writes bytes such that they can be cut & pasted into a regression test file as new golden data.
+  private static void writeGoldenPbOutput(byte[] bytes, PrintWriter errors) {
+    errors.println("Golden Data:");
+    StringBuilder out = new StringBuilder();
+    for (int start = 0; start < bytes.length; start += 20) {
+      errors.format("  expected: \"%s\"\n", appendBytes(out, bytes, start, 20));
+      out.setLength(0);
+    }
+  }
+
+  // Appends a set of bytes in C-style hex format (e.g. \xHH).
+  private static StringBuilder appendBytes(StringBuilder out, byte[] bytes, int start, int length) {
+    int end = Math.min(start + length, bytes.length);
+    for (int n = start; n < end; n++) {
+      out.append(String.format("\\x%02x", bytes[n] & 0xFF));
+    }
+    return out;
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompilerTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompilerTest.java
new file mode 100644
index 0000000000..4a0fdc7a25
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompilerTest.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.primitives.Bytes.asList;
+import static com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler.compile;
+
+import com.google.common.truth.Truth;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode;
+import java.util.Arrays;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class MatcherCompilerTest {
+
+  private static final Byte TERMINATOR = (byte) 0;
+
+  @Test public void testSingleOperation() {
+    byte digit0 = single(0);
+    byte digit5 = single(5);
+    byte digit9 = single(9);
+    assertCompile(ranges("0"), digit0, TERMINATOR);
+    assertCompile(ranges("5"), digit5, TERMINATOR);
+    assertCompile(ranges("9"), digit9, TERMINATOR);
+    assertCompile(ranges("0559"), digit0, digit5, digit5, digit9, TERMINATOR);
+
+    byte digit5Terminating = (byte) (digit5 | (1 << 4));
+    assertCompile(ranges("05", "0559"),
+        digit0, digit5, digit5Terminating, digit9, TERMINATOR);
+  }
+
+  @Test public void testAnyOperation() {
+    byte anyDigit = any(1);
+    byte anyDigit16Times = any(16);
+    assertCompile(ranges("x"), anyDigit, TERMINATOR);
+    assertCompile(ranges("xxxx_xxxx_xxxx_xxxx"), anyDigit16Times, TERMINATOR);
+    assertCompile(ranges("xxxx_xxxx_xxxx_xxxx_x"),
+        anyDigit16Times, anyDigit, TERMINATOR);
+
+    byte anyDigitTerminating = (byte) (anyDigit | (1 << 4));
+    assertCompile(ranges("x", "xx"), anyDigit, anyDigitTerminating, TERMINATOR);
+    assertCompile(ranges("xxxx_xxxx_xxxx_xxxx", "xxxx_xxxx_xxxx_xxxx_x"),
+        anyDigit16Times, anyDigitTerminating, TERMINATOR);
+  }
+
+  @Test public void testRangeOperation() {
+    int range09 = range(0, 9);
+    int range123 = range(1, 2, 3);
+    int range789 = range(7, 8, 9);
+
+    assertCompile(ranges("[09]"), hi(range09), lo(range09), TERMINATOR);
+    assertCompile(ranges("[123][789]"),
+        hi(range123), lo(range123), hi(range789), lo(range789), TERMINATOR);
+  }
+
+  @Test public void testMapOperation() {
+    // Force all 10 possible branches to be taken.
+    byte[] data = compile(ranges("00", "11", "22", "33", "44", "55", "66", "77", "88", "99"));
+    // Check only the first 4 bytes for exact values.
+    Assert.assertEquals(
+        asList((byte) 0x95, (byte) 0x31, (byte) 0xF5, (byte) 0x9D),
+        asList(data).subList(0, 4));
+    // Each branch should jump to a 2 byte sequence between 10 and 28 bytes away (inclusive).
+    List<Byte> jumpTable = asList(data).subList(4, 14);
+    List<Byte> remainder = asList(data).subList(14, data.length);
+    // TODO: Now that ordering should be consistent, tighten up this test to ensure
+    // consistency and remove the shorter consistency test below.
+    for (byte jump : new byte[] {0xA, 0xC, 0xE, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C}) {
+      Assert.assertTrue(jumpTable.contains(jump));
+      int index = jumpTable.indexOf(jump);
+      // Subtract the length of the jump table to get relative offset in remaining code.
+      jump = (byte) (jump - 10);
+      // Each jump should end in 2 single-byte instructions (match corresponding digit, terminate).
+      Assert.assertEquals(single(index), remainder.get(jump));
+      Assert.assertEquals(TERMINATOR, remainder.get(jump + 1));
+    }
+  }
+
+  @Test public void testConsistentSorting() {
+    // Ensure that the MatcherCompiler output is consistent, otherwise it can result in a
+    // non-deterministic build, because the generated file changes with each execution.
+    byte[] expected = new byte[] {-128, 0, 0, 29, 3, 5, 7, 32, 0, 33, 0, 34, 0};
+    assertCompile(ranges("00", "11", "22"), expected);
+  }
+
+  /** Returns the 1-byte instruction representing matching a single digit once. */
+  private static Byte single(int value) {
+    checkArgument(value >= 0 && value < 10);
+    return (byte) ((OpCode.SINGLE.ordinal() << 5) | value);
+  }
+
+  /** Returns the 1-byte instruction representing matching any digit a specified number of times. */
+  private static Byte any(int count) {
+    checkArgument(count > 0 && count <= 16);
+    return (byte) ((OpCode.ANY.ordinal() << 5) | (count - 1));
+  }
+
+  /** Returns the 2-byte instruction representing matching a range of digits. */
+  private static int range(int... digits) {
+    int mask = 0;
+    for (int d : digits) {
+      checkArgument(0 <= d && d <= 9);
+      mask |= 1 << d;
+    }
+    return (OpCode.RANGE.ordinal() << 13) | mask;
+  }
+
+  private static Byte hi(int shortInstruction) {
+    return (byte) (shortInstruction >> 8);
+  }
+
+  private static Byte lo(int shortInstruction) {
+    return (byte) (shortInstruction & 0xFF);
+  }
+
+  private void assertCompile(RangeTree dfa, byte... expected) {
+    Truth.assertThat(compile(dfa)).isEqualTo(expected);
+  }
+
+  private static RangeTree ranges(String... lines) {
+    return RangeTree.from(Arrays.stream(lines).map(RangeSpecification::parse));
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/OperationTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/OperationTest.java
new file mode 100644
index 0000000000..bd8f1eeb62
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/OperationTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;
+
+import static com.google.common.primitives.Bytes.asList;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.ByteArrayDataOutput;
+import com.google.common.io.ByteStreams;
+import junit.framework.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class OperationTest {
+
+  @Test public void testWriteJumpTableNoExtraBranches() {
+    ByteArrayDataOutput outBytes = ByteStreams.newDataOutput();
+    Operation.writeJumpTable(outBytes, ImmutableList.of(0x10, 0x80, 0xFC), Statistics.NO_OP);
+    // The jump table size is added to the offsets.
+    Assert.assertEquals(
+        asList(new byte[] {(byte) 0x13, (byte) 0x83, (byte) 0xFF}),
+        asList(outBytes.toByteArray()));
+  }
+
+  // An easy way to reason about what the offsets for the branches should be is to consider
+  // that the last branch must always have the original offset (it jumps from the very end of
+  // the jump table, which is exactly what the original offset specified. The branch before it
+  // is the same except that it must jump over the final branch (ie, +2 bytes) and so on.
+  // Direct offsets are relative to the start of the jump table however and must be adjusted.
+  @Test public void testWriteJumpTableExtraBranches() {
+    ByteArrayDataOutput outBytes = ByteStreams.newDataOutput();
+    // Two extra branches needed (0x200 and 0xF7). Worst case adjustment is 9 bytes.
+    // Total adjustment is 7 bytes (jump table size + 2 * branch)
+    Operation.writeJumpTable(outBytes, ImmutableList.of(0xF7, 0xF6, 0x200), Statistics.NO_OP);
+    Assert.assertEquals(asList(new byte[] {
+        // Jump table: (offset-to-branch, direct-adjusted-offset, offset-to-branch)
+        (byte) 0x03, (byte) 0xFD, (byte) 0x05,
+        // Extra branch: offset = 0xF7 + 2 (jumps over last branch)
+        (byte) 0x10, (byte) 0xF9,
+        // Extra branch: offset = 0x200 (last branch always has original offset)
+        (byte) 0x12, (byte) 0x00}),
+        asList(outBytes.toByteArray()));
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/regression_test_data.textpb b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/regression_test_data.textpb
new file mode 100644
index 0000000000..d97225ccdf
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/regression_test_data.textpb
@@ -0,0 +1,295 @@
+# Copyright (C) 2017 The Libphonenumber Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ---- Manually crafted "unit" tests ----
+
+test_case {
+  name: "Simple Range"
+  range: "1234xxx"
+  # 4 single byte, single value instructions: 0x20 + value
+  # 1 single byte, "ANY" instruction: 0x40 + (count-1)
+  expected: "\x21\x22\x23\x24\x42\x00"
+}
+test_case {
+  # NOTE: When the ANY instruction is marked as terminating, it applies when the instruction is
+  # reached, not after it's executed (i.e. \x50... is "(\d...)?", and not "\d(...)?").
+  # Match 3 x ANY (0x42), then "terminate or ANY" (0x50), then 2 x ANY
+  name: "Variable Any Match #1"
+  range: "1xxx"
+  range: "1xxxxxx"
+  expected: "\x21\x42\x50\x41\x00"
+}
+test_case {
+  name: "Variable Any Match #2"
+  range: "1xxx"
+  range: "1xxxx"
+  range: "1xxxxx"
+  range: "1xxxxxx"
+  # A repeated terminating ANY match applies on every repeat, not just the first time.
+  # Match 3 x ANY (0x42 = \d{3}), then 3 x "terminate or ANY" (0x52 = \d{0,3}).
+  expected: "\x21\x42\x52\x00"
+}
+test_case {
+  name: "Overflow Any Match"
+  range: "xxxxxxxxxxxxxxxxxx"
+  # 18 'any' digits can't fit in one instruction, so write 2 separate opcodes to match 16 (0x4F)
+  # and then 2 (0x41). This will almost never occur since DigitSequence is limited to 18 digits.
+  expected: "\x4F\x41\x00"
+}
+test_case {
+  name: "Range Matching"
+  range: "[0-4]12"
+  # First 2 bytes are a "branch" operation (opcode = 0x60 plus mask), but there are no offsets
+  # after it (since one "branch" is just to continue matching, while the other is failure).
+  expected: "\x60\x1F\x21\x22\x00"
+}
+test_case {
+  name: "Range Matching"
+  # Requires a 2-way branch in the DFA where both paths cover all input digits [0-9].
+  range: "[0-4]12"
+  range: "[5-9]34"
+  # First 2 bytes are a 2-way branch operation (opcode = 0x68 plus mask), then 2 jump offsets
+  # from the end of the branch instruction.
+  expected: "\x68\x1F\x02\x05\x21\x22\x00\x23\x24\x00"
+}
+
+# ---- Deliberate failure cases ----
+
+test_case {
+  name: "Modified Single Match Bytecode"
+  should_fail: true
+  range: "123xxxx"
+  range: "123xxxxx"
+  range: "123xxxxxx"
+  # Expected bytes have been tweaked to accept 4 (\x24), rather than 3 (\x23).
+  expected: "\x21\x22\x24\x43\x51\x00"
+}
+test_case {
+  name: "Modified Range Bytecode"
+  should_fail: true
+  range: "1[2-5]xxxx"
+  # Expected bytes have been tweaked to accept [7-9] (\x63\x80), rather than [2-5] (\x60\x3C)
+  expected: "\x21\x63\x80\x43\x00"
+}
+test_case {
+  name: "Modified Any Match Bytecode"
+  should_fail: true
+  range: "1xxxx"
+  # Expected bytes have been tweaked to accept xxx (\x42), rather than xxxx (\x43)
+  expected: "\x21\x42\x00"
+}
+
+# ---- Auto-generated "stress tests" ----
+
+test_case {
+  name: "GB Mobile"
+  range: "7[1-3]xxxxxxxx"
+  range: "74[0-46-9]xxxxxxx"
+  range: "745[0-689]xxxxxx"
+  range: "7457[0-57-9]xxxxx"
+  range: "750[0-8]xxxxxx"
+  range: "75[13-9]xxxxxxx"
+  range: "752[0-35-9]xxxxxx"
+  range: "7700[01]xxxxx"
+  range: "770[1-9]xxxxxx"
+  range: "77[1-7]xxxxxxx"
+  range: "778[02-9]xxxxxx"
+  range: "779[0-689]xxxxxx"
+  range: "78[014-9]xxxxxxx"
+  range: "78[23][0-8]xxxxxx"
+  range: "79[024-9]xxxxxxx"
+  range: "791[02-9]xxxxxx"
+  range: "7911[028]xxxxx"
+  range: "793[0-689]xxxxxx"
+  # Not much insight here - other than it starts by matching a '7' and terminates in one place
+  # after matching "any digit" 5 times (which is the shortest trailing match in the ranges).
+  expected: "\x27\x8c\xa8\x1a\x2a\x06\x09\x0d\x14\x1c\x20\x40\x10\x1e\x6b\xdf\x1c\x1f\x84\x44"
+  expected: "\x92\x5d\x1d\x16\x21\x88\x64\x92\x55\x1d\x0f\x21\x24\x6b\xf3\x09\x10\x82\x22\x49"
+  expected: "\x6d\x03\x1b\x18\x40\x10\x19\x6b\x7f\x17\x19\x61\xff\x10\x11\x63\xef\x0e\x68\x01"
+  expected: "\x11\x0c\x63\xfd\x07\x63\x7f\x04\x6b\xfd\x02\x0a\x40\x08\x63\xbf\x05\x60\x03\x02"
+  expected: "\x61\x05\x44\x00"
+}
+test_case {
+  name: "India Fixed Line"
+  range: "11[2-7]xxxxxxx"
+  range: "12[0-249][2-7]xxxxxx"
+  range: "12[35-8]x[2-7]xxxxx"
+  range: "13[0-25][2-7]xxxxxx"
+  range: "13[346-9]x[2-7]xxxxx"
+  range: "14[145][2-7]xxxxxx"
+  range: "14[236-9]x[2-7]xxxxx"
+  range: "1[59][0235-9]x[2-7]xxxxx"
+  range: "1[59][14][2-7]xxxxxx"
+  range: "16[014][2-7]xxxxxx"
+  range: "16[235-9]x[2-7]xxxxx"
+  range: "17[1257][2-7]xxxxxx"
+  range: "17[34689]x[2-7]xxxxx"
+  range: "18[01346][2-7]xxxxxx"
+  range: "18[257-9]x[2-7]xxxxx"
+  range: "2[02][2-7]xxxxxxx"
+  range: "21[134689]x[2-7]xxxxx"
+  range: "21[257][2-7]xxxxxx"
+  range: "23[013][2-7]xxxxxx"
+  range: "23[24-8]x[2-7]xxxxx"
+  range: "24[01][2-7]xxxxxx"
+  range: "24[2-8]x[2-7]xxxxx"
+  range: "25[0137][2-7]xxxxxx"
+  range: "25[25689]x[2-7]xxxxx"
+  range: "26[0158][2-7]xxxxxx"
+  range: "26[2-4679]x[2-7]xxxxx"
+  range: "27[13-79]x[2-7]xxxxx"
+  range: "278[2-7]xxxxxx"
+  range: "28[1568][2-7]xxxxxx"
+  range: "28[2-479]x[2-7]xxxxx"
+  range: "29[14][2-7]xxxxxx"
+  range: "29[235-9]x[2-7]xxxxx"
+  range: "301x[2-7]xxxxx"
+  range: "31[79]x[2-7]xxxxx"
+  range: "32[1-5]x[2-7]xxxxx"
+  range: "326[2-7]xxxxxx"
+  range: "33[2-7]xxxxxxx"
+  range: "34[13][2-7]xxxxxx"
+  range: "342[0189][2-7]xxxxx"
+  range: "342[2-7]xxxxxx"
+  range: "34[5-8]x[2-7]xxxxx"
+  range: "35[125689]x[2-7]xxxxx"
+  range: "35[34][2-7]xxxxxx"
+  range: "36[01489][2-7]xxxxxx"
+  range: "36[235-7]x[2-7]xxxxx"
+  range: "37[02-46][2-7]xxxxxx"
+  range: "37[157-9]x[2-7]xxxxx"
+  range: "38[159][2-7]xxxxxx"
+  range: "38[2-467]x[2-7]xxxxx"
+  range: "4[04][2-7]xxxxxxx"
+  range: "41[14578]x[2-7]xxxxx"
+  range: "41[36][2-7]xxxxxx"
+  range: "42[1-47][2-7]xxxxxx"
+  range: "42[5689]x[2-7]xxxxx"
+  range: "43[15][2-7]xxxxxx"
+  range: "43[2-467]x[2-7]xxxxx"
+  range: "45[12][2-7]xxxxxx"
+  range: "45[4-7]x[2-7]xxxxx"
+  range: "46[0-26-9][2-7]xxxxxx"
+  range: "46[35]x[2-7]xxxxx"
+  range: "47[0-24-9][2-7]xxxxxx"
+  range: "473x[2-7]xxxxx"
+  range: "48[013-57][2-7]xxxxxx"
+  range: "48[2689]x[2-7]xxxxx"
+  range: "49[014-7][2-7]xxxxxx"
+  range: "49[2389]x[2-7]xxxxx"
+  range: "51[025][2-7]xxxxxx"
+  range: "51[146-9]x[2-7]xxxxx"
+  range: "52[14-8]x[2-7]xxxxx"
+  range: "522[2-7]xxxxxx"
+  range: "53[1346]x[2-7]xxxxx"
+  range: "53[25][2-7]xxxxxx"
+  range: "54[14-69]x[2-7]xxxxx"
+  range: "54[28][2-7]xxxxxx"
+  range: "55[12][2-7]xxxxxx"
+  range: "55[46]x[2-7]xxxxx"
+  range: "56[146-9]x[2-7]xxxxx"
+  range: "56[25][2-7]xxxxxx"
+  range: "571[2-7]xxxxxx"
+  range: "57[2-4]x[2-7]xxxxx"
+  range: "581[2-7]xxxxxx"
+  range: "58[2-8]x[2-7]xxxxx"
+  range: "59[15][2-7]xxxxxx"
+  range: "59[246]x[2-7]xxxxx"
+  range: "61[1358]x[2-7]xxxxx"
+  range: "612[2-7]xxxxxx"
+  range: "621[2-7]xxxxxx"
+  range: "62[2457]x[2-7]xxxxx"
+  range: "631[2-7]xxxxxx"
+  range: "63[2-4]x[2-7]xxxxx"
+  range: "641[2-7]xxxxxx"
+  range: "64[235-7]x[2-7]xxxxx"
+  range: "65[17][2-7]xxxxxx"
+  range: "65[2-689]x[2-7]xxxxx"
+  range: "66[13][2-7]xxxxxx"
+  range: "66[24578]x[2-7]xxxxx"
+  range: "671[2-7]xxxxxx"
+  range: "67[235689]x[2-7]xxxxx"
+  range: "674[0189][2-7]xxxxx"
+  range: "674[2-7]xxxxxx"
+  range: "680[2-7]xxxxxx"
+  range: "68[1-6]x[2-7]xxxxx"
+  range: "71[013-9]x[2-7]xxxxx"
+  range: "712[2-7]xxxxxx"
+  range: "72[0235-9]x[2-7]xxxxx"
+  range: "72[14][2-7]xxxxxx"
+  range: "73[134][2-7]xxxxxx"
+  range: "73[2679]x[2-7]xxxxx"
+  range: "74[1-35689]x[2-7]xxxxx"
+  range: "74[47][2-7]xxxxxx"
+  range: "75[15][2-7]xxxxxx"
+  range: "75[2-46-9]x[2-7]xxxxx"
+  range: "7[67][02-9]x[2-7]xxxxx"
+  range: "7[67]1[2-7]xxxxxx"
+  range: "78[013-7]x[2-7]xxxxx"
+  range: "782[0-6][2-7]xxxxx"
+  range: "788[0189][2-7]xxxxx"
+  range: "788[2-7]xxxxxx"
+  range: "79[0189]x[2-7]xxxxx"
+  range: "79[2-7]xxxxxxx"
+  range: "80[2-467]xxxxxxx"
+  range: "81[1357-9]x[2-7]xxxxx"
+  range: "816[2-7]xxxxxx"
+  range: "82[014][2-7]xxxxxx"
+  range: "82[235-8]x[2-7]xxxxx"
+  range: "83[03-57-9]x[2-7]xxxxx"
+  range: "83[126][2-7]xxxxxx"
+  range: "84[0-24-9]x[2-7]xxxxx"
+  range: "85xx[2-7]xxxxx"
+  range: "86[136][2-7]xxxxxx"
+  range: "86[2457-9]x[2-7]xxxxx"
+  range: "87[078][2-7]xxxxxx"
+  range: "87[1-6]x[2-7]xxxxx"
+  range: "88[1256]x[2-7]xxxxx"
+  range: "88[34][2-7]xxxxxx"
+  range: "891[2-7]xxxxxx"
+  range: "89[2-4]x[2-7]xxxxx"
+  expected: "\x81\x0f\xac\x72\x08\x1e\x3b\x58\xad\xcc\x75\x8d\x8b\x0f\xac\x72\xdc\xec\xf4\x08"
+  expected: "\x0a\x0c\x0e\x10\x10\xf2\x10\xfa\x11\x00\x11\x06\x11\x0e\x93\x0f\xac\x6d\xc6\x09"
+  expected: "\x0b\x0d\x0f\x11\x13\x15\x17\x11\x07\x11\x0f\x11\x17\x11\x1f\x11\x27\x11\x2d\x11"
+  expected: "\x35\x11\x3d\x81\x31\xf5\x9d\x09\x0b\x0d\xa9\x0f\x11\x13\x15\x17\x12\x27\x12\x28"
+  expected: "\x11\x34\x11\x38\x11\x3d\x11\x41\x11\x43\x11\x45\x93\x0f\xa9\x9d\x8c\x09\x0b\x0d"
+  expected: "\x0f\x11\x13\x15\x17\x11\x3c\x11\x40\x11\x44\x11\x48\x11\x4c\x11\x50\x11\x52\x11"
+  expected: "\x54\x90\xed\xac\x72\x08\x99\x0a\x0c\x0e\x10\x12\x73\x11\xab\x11\xad\x11\xb1\x11"
+  expected: "\xb5\x11\xb9\x11\xdd\x95\x31\xf5\x9d\x63\x0a\x0c\x0e\x10\x12\x14\x16\x18\x1a\x11"
+  expected: "\xab\x11\xaf\x11\xb3\x11\xd4\x11\xd5\x11\xb1\x11\xb5\x11\xb9\x11\x44\x93\x0f\xac"
+  expected: "\x72\x09\x0b\x0d\x0f\x11\x13\x15\x17\x19\x11\x11\x11\x15\x11\x19\x11\x1d\x11\x21"
+  expected: "\x11\x25\x11\x29\x11\x2d\x11\x31\x81\x0f\xac\x72\x08\x0a\x0c\x0e\x10\x12\x14\x16"
+  expected: "\x11\x29\x11\x2d\x11\x13\x11\x2f\x11\x33\x11\x37\x11\x3b\x11\x40\x60\xfc\x11\x90"
+  expected: "\x6b\x03\x02\x04\x11\x93\x11\x88\x60\xdc\x11\x84\x6a\x17\x02\x04\x11\x80\x11\x85"
+  expected: "\x68\x27\x02\x04\x11\x78\x11\x7d\x84\x44\x89\x52\x02\x04\x11\x6e\x11\x73\x6b\xed"
+  expected: "\x02\x04\x11\x6d\x11\x64\x68\x13\x02\x04\x11\x5e\x11\x63\x84\x42\x8a\x4a\x02\x04"
+  expected: "\x11\x54\x11\x59\x68\x5b\x02\x04\x11\x4c\x11\x51\x82\x24\x51\x32\x02\x04\x11\x49"
+  expected: "\x11\x40\x80\x44\x92\x33\x02\x04\x11\x38\x11\x3d\x80\x44\x92\x53\x02\x04\x11\x2e"
+  expected: "\x11\x33\x84\x42\x90\x33\x02\x04\x11\x24\x11\x29\x69\x23\x02\x04\x11\x1c\x11\x21"
+  expected: "\x82\x42\x49\x22\x02\x04\x11\x19\x11\x10\x84\x24\x4a\x52\x02\x04\x11\x08\x11\x0d"
+  expected: "\x84\x44\x91\x52\x02\x04\x10\xfe\x11\x03\x80\x00\x89\x2a\xff\xf8\x80\x66\xd8\x32"
+  expected: "\xf2\xf5\xf9\x82\x20\x4a\x4a\xf2\xeb\x6b\x13\xe7\xee\x68\x5d\xe3\xea\x82\x04\x8a"
+  expected: "\x52\xdd\xe4\x80\x22\x89\x42\xde\xd7\x84\x42\x91\x2a\xd1\xd8\x80\x04\x8a\x52\xcb"
+  expected: "\xd2\x80\x04\x92\x0a\xc5\xcc\x82\x22\x50\x4b\xbf\xc6\x6b\xf7\xbb\xc2\x68\xbb\xb7"
+  expected: "\xbe\x68\xf3\xb3\xba\x84\x44\x8a\x0d\xad\xb4\x80\x22\x49\x12\xae\xa7\x80\x00\x51"
+  expected: "\x32\xa8\xa1\x82\x40\x49\x12\xa2\x9b\x80\x00\x82\x0a\x95\x9c\x82\x22\x51\x12\x96"
+  expected: "\x8f\x80\x00\x02\x52\x89\x90\x80\x44\x92\x52\x83\x8a\x80\x00\x8a\x12\x7d\x84\x80"
+  expected: "\x20\x08\x32\x7e\x77\x80\x04\x12\x12\x71\x78\x80\x04\x90\x52\x6b\x72\x84\x42\x92"
+  expected: "\x52\x65\x6c\x80\x44\x12\x32\x5f\x66\x84\x40\x93\x52\x59\x60\x5c\x80\x00\x92\x55"
+  expected: "\x52\x59\x6b\xfb\x55\x4e\x84\x04\x81\x32\x48\x4f\x82\x24\x4a\x2a\x49\x42\x84\x44"
+  expected: "\x8a\x52\x3c\x43\x6b\xfd\x3f\x38\x82\x22\x88\x22\x39\x32\x80\x44\x91\x53\x2c\x33"
+  expected: "\x6b\xb9\x2f\x28\x84\x44\x52\x32\x22\x29\x80\x22\x92\x55\x1c\x23\x80\x00\x4a\x4a"
+  expected: "\x1d\x16\x80\x62\x49\x33\x17\x19\x13\x21\x10\x11\x62\x80\x0e\x63\xf7\x0b\x40\x09"
+  expected: "\x40\x0c\x60\xfc\x09\x6b\x03\x09\x07\x40\x05\x60\x7f\x02\x40\x02\x60\xfc\x44\x00"
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/AnyPathTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/AnyPathTest.java
new file mode 100644
index 0000000000..705d430cc7
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/AnyPathTest.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.regex.AnyPath.EMPTY;
+import static com.google.i18n.phonenumbers.metadata.regex.AnyPath.OPTIONAL;
+import static com.google.i18n.phonenumbers.metadata.regex.AnyPath.SINGLE;
+
+import com.google.common.collect.ImmutableSortedSet;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class AnyPathTest {
+  @Test
+  public void testConstants() {
+    assertPath(EMPTY, 0);
+    assertPath(SINGLE, 1);
+    assertPath(OPTIONAL, 0, 1);
+  }
+
+  @Test
+  public void testExtend() {
+    assertThat(EMPTY.extend(false)).isEqualTo(SINGLE);
+    assertThat(EMPTY.extend(true)).isEqualTo(OPTIONAL);
+    // Non-optional extension is the same as joining with SINGLE.
+    assertPath(SINGLE.extend(false), 2);
+    // This is not the same as joining SINGLE.join(OPTIONAL).
+    assertPath(SINGLE.extend(true), 0, 2);
+
+    // 100 extends to 1000 or 1001 (if optional).
+    assertPath(AnyPath.of(0x4).extend(false), 3);
+    assertPath(AnyPath.of(0x4).extend(true), 0, 3);
+  }
+
+  @Test
+  public void testJoin() {
+    assertThat(EMPTY.join(SINGLE)).isEqualTo(SINGLE);
+    assertThat(EMPTY.join(OPTIONAL)).isEqualTo(OPTIONAL);
+    assertPath(SINGLE.join(SINGLE), 2);
+    assertPath(SINGLE.join(OPTIONAL), 1, 2);
+    assertPath(OPTIONAL.join(OPTIONAL), 0, 1, 2);
+
+    // "(x(x)?)?" == 110 and matches 0 to 2.
+    // "(x(x)?)?".join("(x(x)?)?") == "(x(x(x(x)?)?)?)?" == 11111 and matches 0 to 4.
+    assertThat(AnyPath.of(0x7).join(AnyPath.of(0x7))).isEqualTo(AnyPath.of(0x1F));
+
+    // "xx(x)?" == 1100 and matches 2 or 3.
+    // "(xx)?" == 0101 and matches 0 or 2.
+    // "xx(x)?".join("(xx)?") == "xx(xx)?" == 111100 and matches 2 to 5.
+    assertThat(AnyPath.of(0xC).join(AnyPath.of(0x5))).isEqualTo(AnyPath.of(0x3C));
+  }
+
+  @Test
+  public void testMakeOptional() {
+    assertThat(OPTIONAL.makeOptional()).isEqualTo(OPTIONAL);
+    assertThat(SINGLE.makeOptional()).isEqualTo(OPTIONAL);
+    assertPath(AnyPath.of(0x4).makeOptional(), 0, 2);
+  }
+
+  @Test
+  public void testToString() {
+    assertThat(SINGLE.toString()).isEqualTo("x");
+    assertThat(OPTIONAL.toString()).isEqualTo("(x)?");
+    assertThat(AnyPath.of(0x8).toString()).isEqualTo("xxx");  // 1000 = 3 digits
+    assertThat(AnyPath.of(0xA).toString()).isEqualTo("x(xx)?");  // 1010 = 1 or 3 digits
+    assertThat(AnyPath.of(0xF).toString()).isEqualTo("(x(x(x)?)?)?");  // 1111 = 0 to 3 digits
+  }
+
+  // Ordering is important as we need to find the shortest path at certain times.
+  @Test
+  public void testOrdering() {
+    assertThat(SINGLE).isGreaterThan(EMPTY);
+    assertThat(OPTIONAL).isGreaterThan(SINGLE);
+
+    assertThat(AnyPath.of(0x8)).isGreaterThan(AnyPath.of(0x4));
+    // Same length, but the 2nd highest length match is taken into account as a tie break.
+    // This strategy turns out to match numeric comparison perfectly since set-bits are lengths.
+    assertThat(AnyPath.of(0xA)).isGreaterThan(AnyPath.of(0x9));
+  }
+
+  private static void assertPath(AnyPath p, Integer... n) {
+    ImmutableSortedSet<Integer> lengths = ImmutableSortedSet.copyOf(n);
+    int maxLength = lengths.last();
+    assertThat(p.maxLength()).isEqualTo(maxLength);
+    for (int i = 0; i <= maxLength; i++) {
+      assertThat(p.acceptsLength(i)).isEqualTo(lengths.contains(i));
+    }
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeTest.java
new file mode 100644
index 0000000000..dc0230783d
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeTest.java
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
+import static org.junit.Assert.fail;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.Visitor;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class EdgeTest {
+  @Test
+  public void testSimple() {
+    assertThat(Edge.fromMask(0x6).getDigitMask()).isEqualTo(0x6);
+    assertThat(Edge.fromMask(0x6).isOptional()).isFalse();
+
+    assertThat(Edge.fromMask(0x3).toString()).isEqualTo("[01]");  // 0000000011
+    assertThat(Edge.fromMask(0x300).toString()).isEqualTo("[89]");  // 1100000000
+    assertThat(Edge.fromMask(0x1FE).toString()).isEqualTo("[1-8]");  // 0111111110
+    assertThat(Edge.fromMask(ALL_DIGITS_MASK).toString()).isEqualTo("x");  // any digit
+  }
+
+  @Test
+  public void testAny() {
+    assertThat(Edge.fromMask(ALL_DIGITS_MASK)).isEqualTo(Edge.any());
+    assertThat(Edge.any().optional()).isEqualTo(Edge.optionalAny());
+
+    assertThat(Edge.any().toString()).isEqualTo("x");
+    // Unlike AnyPath, simple edges are not sequences, so don't need parens for optional.
+    assertThat(Edge.optionalAny().toString()).isEqualTo("x?");
+  }
+
+  @Test
+  public void testEpsilon() {
+    // Epsilon isn't optional, it represents a path that non-optionally accepts no input.
+    assertThat(Edge.epsilon().isOptional()).isFalse();
+    assertThat(Edge.epsilon().toString()).isEqualTo("e");
+  }
+
+  @Test
+  public void testConcatenation() {
+    Edge concatenated = Edge.concatenation(Edge.fromMask(0x3), Edge.any());
+    assertThat(concatenated.toString()).isEqualTo("[01]x");
+    TestingVisitor v = new TestingVisitor() {
+      @Override
+      public void visitSequence(List<Edge> edges) {
+        assertThat(edges).containsExactly(Edge.fromMask(0x3), Edge.any()).inOrder();
+        wasTested = true;
+      }
+    };
+    concatenated.accept(v);
+    assertThat(v.wasTested).isTrue();
+  }
+
+  @Test
+  public void testGroup() {
+    Edge group = Edge.disjunction(ImmutableSet.of(Edge.fromMask(0x3), Edge.any()));
+    TestingVisitor v = new TestingVisitor() {
+      @Override
+      public void visitGroup(Set<Edge> edges, boolean isOptional) {
+        assertThat(edges).containsExactly(Edge.any(), Edge.fromMask(0x3)).inOrder();
+        assertThat(isOptional).isFalse();
+        wasTested = true;
+      }
+    };
+    group.accept(v);
+    assertThat(group.toString()).isEqualTo("(x|[01])");
+    assertThat(v.wasTested).isTrue();
+  }
+
+  @Test
+  public void testOptionalGroup() {
+    Edge group = Edge.disjunction(ImmutableSet.of(Edge.fromMask(0x3), Edge.epsilon(), Edge.any()));
+    TestingVisitor v = new TestingVisitor() {
+      @Override
+      public void visitGroup(Set<Edge> edges, boolean isOptional) {
+        // Reordered and epsilon removed.
+        assertThat(edges).containsExactly(Edge.any(), Edge.fromMask(0x3)).inOrder();
+        assertThat(isOptional).isTrue();
+        wasTested = true;
+      }
+    };
+    group.accept(v);
+    assertThat(group.toString()).isEqualTo("(x|[01])?");
+    assertThat(v.wasTested).isTrue();
+  }
+
+  @Test
+  public void testOrdering() {
+    // Testing ordering is important because when generating regular expressions, the edge order
+    // defines a lot about the visual order of the final regular expression. This order should be
+    // as close to "what a person would consider reasonable" as possible. In fact some of the cases
+    // tested here will never occur in real situations (e.g. sequences compared with groups)
+    // because of the way composite edges are created. However it seems sensible to test the
+    // behaviour nevertheless.
+
+    // Simple Edges
+
+    assertSameOrder(e("0"), e("0"));
+    // "0" < "1" - lowest bit set wins
+    assertOrdered(e("0"), e("1"));
+    // "[01]" < "1" - lowest bit set wins
+    assertOrdered(e("[01]"), e("1"));
+    // "x" < "9" - lowest bit set wins
+    assertOrdered(X, e("9"));
+
+    // Sequences
+
+    // ("0x" < "1") and ("0" < "1x") - first edge in sequence is compared to single edge.
+    assertOrdered(seq(e("0"), X), e("1"));
+    assertOrdered(e("0"), seq(e("1"), X));
+    // "[01]" < "[01]x" - single edges are "smaller" than sequences of edges if all else is equal.
+    assertOrdered(e("[01]"), seq(e("[01]"), X));
+
+    // "[01]x" == "[01]x"
+    assertSameOrder(seq(e("[01]"), X), seq(e("[01]"), X));
+    // "x1" < "x2" - comparing 2 sequences compares all edges.
+    assertOrdered(seq(X, e("1")), seq(X, e("2")));
+
+    // "[01]x" < "[01]xx" - shortest sequence wins in tie break (similar to how "[01]" < "[01]x")
+    assertOrdered(seq(e("[01]"), X), seq(e("[01]"), X, X));
+
+    // Disjunctions
+
+    // "(1|2)" == "(2|1)" - edges are sorted when creating disjunctions
+    assertSameOrder(or(e("1"), e("2")), or(e("2"), e("1")));
+    // "(1|2|3)" < "(1|2|4)" - comparing 2 disjunctions compares all edges.
+    assertOrdered(or(e("1"), e("2"), e("3")), or(e("1"), e("2"), e("4")));
+    // "(1|2)" < "(1|2|3)" - shortest sequence wins in tie break
+    assertOrdered(or(e("1"), e("2")), or(e("1"), e("2"), e("3")));
+
+    // Miscellaneous
+
+    // "1" < "(1|2)" - if first edge matches, single edges sort before groups.
+    assertOrdered(e("1"), or(e("1"), e("2")));
+
+    // "(1|x)" < "1x" - because "(1|x)" is actually "(x|1)" and "x" < "1".
+    assertOrdered(or(e("1"), X), seq(e("1"), X));
+  }
+
+  private static void assertSameOrder(Edge lhs, Edge rhs) {
+    assertThat(lhs).isEquivalentAccordingToCompareTo(rhs);
+    assertThat(lhs).isEqualTo(rhs);
+  }
+
+  private static void assertOrdered(Edge lhs, Edge rhs) {
+    assertThat(lhs).isNotEqualTo(rhs);
+    assertThat(lhs).isLessThan(rhs);
+    assertThat(rhs).isGreaterThan(lhs);
+  }
+
+  // A bit like a mock, but not really "mocking" existing behaviour.
+  private static class TestingVisitor implements Visitor {
+    // Set this in overridden method(s).
+    protected boolean wasTested = false;
+
+    @Override
+    public void visit(SimpleEdge edge) {
+      fail("unexpected call");
+    }
+
+    @Override
+    public void visitSequence(List<Edge> edges) {
+      fail("unexpected call");
+    }
+
+    @Override
+    public void visitGroup(Set<Edge> edges, boolean isOptional) {
+      fail("unexpected call");
+    }
+  }
+
+  // The 'any digit' edge.
+  private static final Edge X = e("x");
+
+  // Creates a simple edge from a range specification string for testing.
+  private static SimpleEdge e(String s) {
+    RangeSpecification spec = RangeSpecification.parse(s);
+    Preconditions.checkArgument(spec.length() == 1, "only specify single digit ranges");
+    return SimpleEdge.fromMask(spec.getBitmask(0));
+  }
+
+  // Creates sequence of edges (wrapping for convenience).
+  private static Edge seq(Edge first, Edge second, Edge... rest) {
+    // This already rejects epsilon edges.
+    Edge edge = Edge.concatenation(first, second);
+    for (Edge e : rest) {
+      edge = Edge.concatenation(edge, e);
+    }
+    return edge;
+  }
+
+  // Creates a non-optional disjunction of edges.
+  private static Edge or(Edge... edges) {
+    List<Edge> e = Arrays.asList(edges);
+    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "use 'opt()' for optional groups");
+    return Edge.disjunction(e);
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriterTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriterTest.java
new file mode 100644
index 0000000000..e5cacb01ea
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriterTest.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.common.base.Preconditions;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class EdgeWriterTest {
+
+  // Note that this code is tested very thoroughly by any "round-tripping" of regular expressions
+  // in the metadata (i.e. generating regular expressions from DFAs and then re-parsing then to
+  // ensure that the same DFA is produced). This is part of any acceptance test for generating
+  // regular expressions and serves as a far more comprehensive stress test on the code. These
+  // tests are thus limited to simpler cases and highlighting interesting behaviour.
+
+  // The 'any digit' edge.
+  private static final Edge X = e("x");
+
+  @Test
+  public void testSimple() {
+    assertThat(regex(e("0"))).isEqualTo("0");
+    assertThat(regex(e("[0-7]"))).isEqualTo("[0-7]");
+    assertThat(regex(e("[0-9]"))).isEqualTo("\\d");
+    assertThat(regex(X)).isEqualTo("\\d");
+  }
+
+  @Test
+  public void testSequences() {
+    assertThat(regex(seq(e("0"), e("1"), e("2")))).isEqualTo("012");
+  }
+
+  @Test
+  public void testGroups() {
+    // Non-optional groups spanning the top level don't need parentheses.
+    assertThat(regex(or(e("0"), e("1"), e("2")))).isEqualTo("0|1|2");
+    // Optional groups always need parentheses.
+    assertThat(regex(opt(e("0"), e("1"), e("2")))).isEqualTo("(?:0|1|2)?");
+    // Once a group has prefix or suffix, parentheses are needed.
+    assertThat(regex(
+        seq(
+            or(e("0"), e("1")),
+            e("2"))))
+        .isEqualTo("(?:0|1)2");
+  }
+
+  @Test
+  public void testNesting() {
+    // Basic nesting is handled by a very straightforward edge visitor, so one non-trivial test
+    // will cover all the basic cases ("any digit" sequences are a different matter however).
+    assertThat(regex(
+        seq(
+            e("0"),
+            or(
+                e("1"),
+                seq(
+                    e("2"),
+                    opt(e("3"), e("4")))),
+            e("5"), e("6"))))
+        .isEqualTo("0(?:1|2(?:3|4)?)56");
+  }
+
+  @Test
+  public void testAnyDigitSequences() {
+    // This is the complex part of efficient regular expression generation.
+    assertThat(regex(seq(e("0"), e("1"), X))).isEqualTo("01\\d");
+    // "\d\d" is shorter than "\d{2}"
+    assertThat(regex(seq(X, X))).isEqualTo("\\d\\d");
+    assertThat(regex(seq(X, X, X))).isEqualTo("\\d{3}");
+    // Top level optional groups are supported.
+    assertThat(regex(opt(seq(X, X)))).isEqualTo("(?:\\d{2})?");
+    // Optional parts go at the end.
+    assertThat(regex(
+        seq(
+            opt(seq(X, X)),
+            X, X)))
+        .isEqualTo("\\d\\d(?:\\d{2})?");
+    // "(x(x(x)?)?)?"
+    Edge anyGrp = opt(seq(
+        X,
+        opt(seq(
+            X,
+            opt(X)))));
+    // The two cases of a group on its own or as part of a sequence are handled separately, so
+    // must be tested separately.
+    assertThat(regex(anyGrp)).isEqualTo("\\d{0,3}");
+    assertThat(regex(seq(e("1"), e("2"), anyGrp))).isEqualTo("12\\d{0,3}");
+    // xx(x(x(x)?)?)?"
+    assertThat(regex(seq(X, X, anyGrp))).isEqualTo("\\d{2,5}");
+    // Combining "any digit" groups produces minimal representation
+    assertThat(regex(seq(anyGrp, anyGrp))).isEqualTo("\\d{0,6}");
+  }
+
+  // Helper to call standard version of regex generator (not using 'dot' for matching).
+  private String regex(Edge e) {
+    return EdgeWriter.toRegex(e, false /* use dot match */);
+  }
+
+  // Creates a simple edge from a range specification string for testing.
+  private static SimpleEdge e(String s) {
+    RangeSpecification spec = RangeSpecification.parse(s);
+    Preconditions.checkArgument(spec.length() == 1, "only specify single digit ranges");
+    return SimpleEdge.fromMask(spec.getBitmask(0));
+  }
+
+  // Creates sequence of edges (wrapping for convenience).
+  private static Edge seq(Edge first, Edge second, Edge... rest) {
+    // This already rejects epsilon edges.
+    Edge edge = Edge.concatenation(first, second);
+    for (Edge e : rest) {
+      edge = Edge.concatenation(edge, e);
+    }
+    return edge;
+  }
+
+  // Creates a non-optional disjunction of edges.
+  private static Edge or(Edge... edges) {
+    List<Edge> e = Arrays.asList(edges);
+    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "use 'opt()' for optional groups");
+    return Edge.disjunction(e);
+  }
+
+  // Creates an optional disjunction of edges.
+  private static Edge opt(Edge... edges) {
+    List<Edge> e = new ArrayList<>();
+    e.addAll(Arrays.asList(edges));
+    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "don't pass epsilon directly");
+    e.add(Edge.epsilon());
+    return Edge.disjunction(e);
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaBuilder.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaBuilder.java
new file mode 100644
index 0000000000..654a334c0a
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaBuilder.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;
+
+import com.google.common.graph.MutableValueGraph;
+import com.google.common.graph.ValueGraph;
+import com.google.common.graph.ValueGraphBuilder;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+
+/** Simple fluent API for constructing graphs for testing. */
+final class NfaBuilder {
+  private final MutableValueGraph<Node, SimpleEdge> graph =
+      ValueGraphBuilder.directed().allowsSelfLoops(false).build();
+  // The last node added to the graph.
+  private Node lastNode;
+
+  /** Creates a new mutable NFA graph. */
+  public NfaBuilder() {
+    graph.addNode(INITIAL);
+    graph.addNode(TERMINAL);
+    lastNode = TERMINAL;
+  }
+
+  /**
+   * Returns an unmodifiable view of the underlying graph (not a snapshot). If the builder is
+   * modified after this method is called, it will affect what was returned.
+   */
+  public ValueGraph<Node, SimpleEdge> graph() {
+    return graph;
+  }
+
+  /** Adds a new path from the given source node, returning the newly created target node. */
+  public Node addPath(Node source, String path) {
+    RangeSpecification spec = RangeSpecification.parse(path);
+    for (int n = 0; n < spec.length(); n++) {
+      lastNode = lastNode.createNext();
+      addEdge(source, lastNode, SimpleEdge.fromMask(spec.getBitmask(n)));
+      source = lastNode;
+    }
+    return lastNode;
+  }
+
+  /** Adds a new path between the given source and target (all intermediate nodes are new). */
+  public void addPath(Node source, Node target, String path) {
+    RangeSpecification spec = RangeSpecification.parse(path);
+    for (int n = 0; n < spec.length() - 1; n++) {
+      lastNode = lastNode.createNext();
+      addEdge(source, lastNode, SimpleEdge.fromMask(spec.getBitmask(n)));
+      source = lastNode;
+    }
+    addEdge(source, target, SimpleEdge.fromMask(spec.getBitmask(spec.length() - 1)));
+  }
+
+  /**
+   * Adds a new path between the given source and target nodes, along with an epsilon edge from the
+   * source to the target.
+   */
+  public void addOptionalPath(Node source, Node target, String path) {
+    addPath(source, target, path);
+    addEpsilon(source, target);
+  }
+
+  private void addEpsilon(Node s, Node t) {
+    checkArgument(graph.nodes().contains(s), "missing source node");
+    checkArgument(graph.nodes().contains(s), "missing target node");
+    SimpleEdge e = graph.putEdgeValue(s, t, Edge.epsilon());
+    if (e != null) {
+      // Edge already exists; if not an epsilon, make it optional.
+      checkArgument(!e.equals(Edge.epsilon()) && !e.isOptional(), "epsilon already added");
+      graph.putEdgeValue(s, t, e.optional());
+    }
+  }
+
+  private void addEdge(Node s, Node t, SimpleEdge e) {
+    graph.addNode(s);
+    graph.addNode(t);
+    checkArgument(graph.putEdgeValue(s, t, e) == null, "edge already exists");
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattenerTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattenerTest.java
new file mode 100644
index 0000000000..adfeb15ffd
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattenerTest.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;
+
+import com.google.common.base.Preconditions;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.TreeSet;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class NfaFlattenerTest {
+  // The 'any digit' edge.
+  private static final Edge X = e("x");
+
+  @Test
+  public void testSimple() {
+    NfaBuilder nfa = new NfaBuilder();
+    nfa.addPath(INITIAL, TERMINAL, "12");
+    Edge flat = NfaFlattener.flatten(nfa.graph());
+    assertThat(flat).isEqualTo(seq(e("1"), e("2")));
+    assertThat(flat.toString()).isEqualTo("12");
+
+    nfa.addPath(INITIAL, TERMINAL, "34");
+    flat = NfaFlattener.flatten(nfa.graph());
+    assertThat(flat).isEqualTo(
+        or(
+            seq(e("1"), e("2")),
+            seq(e("3"), e("4"))));
+    assertThat(flat.toString()).isEqualTo("(12|34)");
+  }
+
+  @Test
+  public void testSubgroup() {
+    NfaBuilder nfa = new NfaBuilder();
+    Node split = nfa.addPath(INITIAL, "12");
+    Node join = nfa.addPath(split, "34");
+    nfa.addPath(split, join, "56");
+    nfa.addPath(join, TERMINAL, "78");
+
+    Edge flat = NfaFlattener.flatten(nfa.graph());
+    assertThat(flat).isEqualTo(
+        seq(e("1"), e("2"),
+            or(
+                seq(e("3"), e("4")),
+                seq(e("5"), e("6"))
+            ),
+            e("7"), e("8")));
+    assertThat(flat.toString()).isEqualTo("12(34|56)78");
+  }
+
+  @Test
+  public void testSubgroupWithEarlyJoining() {
+    NfaBuilder nfa = new NfaBuilder();
+    // Create a graph with 4 initial paths branching out which collapses to 3, 2 and then 1.
+    Node groupStart = nfa.addPath(INITIAL, "0");
+    // Add 2 edges to the first join point (if we add only one edge then it clashes with the
+    // joining edge, which goes directly from groupStart to firstJoin.
+    Node firstJoin = nfa.addPath(nfa.addPath(groupStart, "1"), "2");
+    nfa.addPath(groupStart, firstJoin, "3");
+    Node secondJoin = nfa.addPath(firstJoin, "4");
+    nfa.addPath(groupStart, secondJoin, "5");
+    Node groupEnd = nfa.addPath(secondJoin, "6");
+    nfa.addPath(groupStart, groupEnd, "7");
+    nfa.addPath(groupEnd, TERMINAL, "8");
+
+    Edge flat = NfaFlattener.flatten(nfa.graph());
+    assertThat(flat).isEqualTo(
+        seq(e("0"),
+            or(
+                seq(
+                    or(
+                        seq(
+                            or(
+                                seq(e("1"), e("2")),
+                                e("3")),
+                            e("4")),
+                        e("5")),
+                    e("6")),
+                e("7")),
+            e("8")));
+    assertThat(flat.toString()).isEqualTo("0(((12|3)4|5)6|7)8");
+  }
+
+  @Test
+  public void testPathDuplication() {
+    NfaBuilder nfa = new NfaBuilder();
+    Node groupStart = nfa.addPath(INITIAL, "0");
+    Node lhsMid = nfa.addPath(groupStart, "1");
+    Node groupEnd = nfa.addPath(lhsMid, "2");
+    Node rhsMid = nfa.addPath(groupStart, "3");
+    nfa.addPath(rhsMid, groupEnd, "4");
+    nfa.addPath(groupEnd, TERMINAL, "5");
+
+    // So far this is a normal nestable graph:
+    //           ,--1-->()--2--v
+    // (I)--0-->()             ()--5-->(T)
+    //           `--3-->()--4--^
+    Edge flat = NfaFlattener.flatten(nfa.graph());
+    assertThat(flat).isEqualTo(
+        seq(e("0"),
+            or(
+                seq(e("1"), e("2")),
+                seq(e("3"), e("4"))),
+            e("5")));
+    assertThat(flat.toString()).isEqualTo("0(12|34)5");
+
+    // This new path "crosses" the group, creating a non-nestable structure which can only be
+    // resolved by duplicating some path (in this case it's the 2nd part of the right-hand-side).
+    nfa.addPath(lhsMid, rhsMid, "x");
+
+    flat = NfaFlattener.flatten(nfa.graph());
+    assertThat(flat).isEqualTo(
+        seq(e("0"),
+            or(
+                seq(e("1"),
+                    or(
+                        e("2"),
+                        seq(X, e("4")))),
+                seq(e("3"), e("4"))),
+            e("5")));
+    // Note the duplication of the '4' to make the graph nestable.
+    assertThat(flat.toString()).isEqualTo("0(1(x4|2)|34)5");
+
+  }
+
+  @Test
+  public void testNodeOrdering_bug_65250963() {
+    //  ,--->(C)----------.
+    //  |                 v
+    // (I)-->(D)-->(B)-->(T)
+    //  |           ^
+    //  `--->(A)----'
+    NfaBuilder nfa = new NfaBuilder();
+    // IMPORTANT: Order of insertion determines the node IDs (A=1, B=2...). The edge index just
+    // happens to match node ID for readability, but doesn't affect the test directly.
+    Node a = nfa.addPath(INITIAL, "1");
+    Node b = nfa.addPath(a, "2");
+    Node c = nfa.addPath(INITIAL, "3");
+    Node d = nfa.addPath(INITIAL, "4");
+    // Now join up remaining paths.
+    nfa.addPath(d, b, "5");
+    nfa.addPath(b, TERMINAL, "6");
+    nfa.addPath(c, TERMINAL, "7");
+    Comparator<Node> ordering = NfaFlattener.nodeOrdering(nfa.graph());
+
+    // In the old ordering code, because (B) and (D) are not reachable to/from (C) we would have
+    // had the ordering (D < B), (B < C), (C < D) giving a cycle. In the new code, the longest path
+    // length to reach (C) is less than (B), so we get (C < B) and we no longer have a cycle.
+    // The node ordering is now: (INITIAL, A, C, D, B, TERMINAL)
+    TreeSet<Node> nodes = new TreeSet<>(ordering);
+    nodes.add(INITIAL);
+    nodes.add(TERMINAL);
+    nodes.add(a);
+    nodes.add(b);
+    nodes.add(c);
+    nodes.add(d);
+    assertThat(nodes).containsExactly(INITIAL, a, c, d, b, TERMINAL).inOrder();
+  }
+
+  @Test
+  public void testOptionalTopLevelGroup_bug_69101586() {
+    //  ,--->(e)----.
+    //  |           v
+    // (I)-->(A)-->(T)
+    NfaBuilder nfa = new NfaBuilder();
+    nfa.addOptionalPath(INITIAL, TERMINAL, "xx");
+    Edge flat = NfaFlattener.flatten(nfa.graph());
+    assertThat(flat).isEqualTo(opt(seq(X, X)));
+    assertThat(flat.toString()).isEqualTo("(xx)?");
+  }
+
+  // Creates a simple edge from a range specification string for testing.
+  private static SimpleEdge e(String s) {
+    RangeSpecification spec = RangeSpecification.parse(s);
+    Preconditions.checkArgument(spec.length() == 1, "only specify single digit ranges");
+    return SimpleEdge.fromMask(spec.getBitmask(0));
+  }
+
+  // Creates sequence of edges (wrapping for convenience).
+  private static Edge seq(Edge first, Edge second, Edge... rest) {
+    // This already rejects epsilon edges.
+    Edge edge = Edge.concatenation(first, second);
+    for (Edge e : rest) {
+      edge = Edge.concatenation(edge, e);
+    }
+    return edge;
+  }
+
+  // Creates an optional disjunction of edges.
+  private static Edge opt(Edge... edges) {
+    List<Edge> e = new ArrayList<>();
+    e.addAll(Arrays.asList(edges));
+    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "don't pass epsilon directly");
+    e.add(Edge.epsilon());
+    return Edge.disjunction(e);
+  }
+
+  // Creates a non-optional disjunction of edges.
+  private static Edge or(Edge... edges) {
+    List<Edge> e = Arrays.asList(edges);
+    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "use 'opt()' for optional groups");
+    return Edge.disjunction(e);
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NodeTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NodeTest.java
new file mode 100644
index 0000000000..00a4d8295d
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NodeTest.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class NodeTest {
+  @Test
+  public void testConstants() {
+    assertThat(INITIAL.id()).isEqualTo(0);
+    assertThat(TERMINAL.id()).isEqualTo(1);
+    assertThat(TERMINAL).isNotEqualTo(INITIAL);
+  }
+
+  @Test
+  public void testNext() {
+    assertThat(INITIAL.createNext()).isSameInstanceAs(TERMINAL);
+    assertThat(TERMINAL.createNext()).isNotEqualTo(TERMINAL);
+    assertThat(TERMINAL.createNext().id()).isEqualTo(2);
+    Node node = INITIAL;
+    for (int id = 0; id < 10; id++) {
+      assertThat(node.id()).isEqualTo(id);
+      node = node.createNext();
+    }
+  }
+
+  @Test
+  public void testToString() {
+    Node node = INITIAL;
+    for (int id = 0; id < 10; id++) {
+      assertThat(node.toString()).isEqualTo(Integer.toString(id));
+      node = node.createNext();
+    }
+  }
+
+  // Consistent ordering helps ensure regular expressions derived from graphs are deterministic.
+  @Test
+  public void testOrdering() {
+    assertThat(TERMINAL).isGreaterThan(INITIAL);
+    Node node = INITIAL;
+    for (int id = 0; id < 10; id++) {
+      Node next = node.createNext();
+      assertThat(next).isGreaterThan(node);
+      node = next;
+    }
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverterTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverterTest.java
new file mode 100644
index 0000000000..073a3576b3
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverterTest.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.regex.Edge.any;
+import static com.google.i18n.phonenumbers.metadata.regex.Edge.epsilon;
+import static com.google.i18n.phonenumbers.metadata.regex.Edge.optionalAny;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;
+
+import com.google.common.collect.Iterables;
+import com.google.common.graph.ValueGraph;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import java.util.List;
+import java.util.stream.Stream;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class RangeTreeConverterTest {
+  // Simple 4 node DFA.
+  // (I) --1--> ( ) --2--> ( ) --x--> (T)
+  @Test
+  public void testSimple() {
+    RangeTree dfa = RangeTree.from(specs("12x"));
+    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
+    assertThat(nfa.nodes()).hasSize(4);
+    Node node = assertOutEdge(nfa, INITIAL, edge(1));
+    node = assertOutEdge(nfa, node, edge(2));
+    node = assertOutEdge(nfa, node, any());
+    assertThat(node).isEqualTo(TERMINAL);
+  }
+
+  // Simple 4 node DFA with optional termination immediately before the terminal node.
+  // (I) --1--> ( ) --2--> (T) --x--> (T)
+  @Test
+  public void testWithOptionalEdge() {
+    RangeTree dfa = RangeTree.from(specs("12x", "12"));
+
+    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
+    assertThat(nfa.nodes()).hasSize(4);
+    Node node = assertOutEdge(nfa, INITIAL, edge(1));
+    node = assertOutEdge(nfa, node, edge(2));
+    node = assertOutEdge(nfa, node, optionalAny());
+    assertThat(node).isEqualTo(TERMINAL);
+  }
+
+  // Simple 4 node DFA with optional termination.
+  // (I) --1--> (T) --2--> ( ) --x--> (T)
+  @Test
+  public void testWithEpsilon() {
+    RangeTree dfa = RangeTree.from(specs("12x", "1"));
+
+    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
+    assertThat(nfa.nodes()).hasSize(4);
+    Node node = assertOutEdge(nfa, INITIAL, edge(1));
+    assertOutEdges(nfa, node, edge(2), epsilon());
+    // One of the out nodes should be the terminal.
+    assertThat(follow(nfa, node, epsilon())).isEqualTo(Node.TERMINAL);
+    node = follow(nfa, node, edge(2));
+    // The other is the normal edge that leads to the terminal.
+    node = follow(nfa, node, any());
+    assertThat(node).isEqualTo(TERMINAL);
+  }
+
+  // Simple 5 node DFA with 2 paths.
+  // (I) --1--> ( ) --2--> ( ) --x--> (T)
+  //   `---3--> ( ) --4----^
+  @Test
+  public void testMultiplePathsWithCommonTail() {
+    RangeTree dfa = RangeTree.from(specs("12x", "34x"));
+
+    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
+    assertThat(nfa.nodes()).hasSize(5);
+
+    assertOutEdges(nfa, INITIAL, edge(1), edge(3));
+    Node lhs = follow(nfa, INITIAL, edge(1));
+    lhs = assertOutEdge(nfa, lhs, edge(2));
+    Node rhs = follow(nfa, INITIAL, edge(3));
+    rhs = assertOutEdge(nfa, rhs, edge(4));
+    assertThat(lhs).isEqualTo(rhs);
+    Node node = assertOutEdge(nfa, lhs, any());
+    assertThat(node).isEqualTo(TERMINAL);
+  }
+
+  @Test
+  public void testOptionalTopLevelGroup_bug_69101586() {
+    // Requires making a top level optional group, which is (deliberately) not easy with the
+    // DFA tooling since it's pretty rare. This is a DFA which can terminate immediately and will
+    // match the empty input (as well as its normal input).
+    RangeTree dfa = RangeTree.from(specs("xx")).union(RangeTree.from(RangeSpecification.empty()));
+
+    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
+    assertThat(nfa.nodes()).hasSize(3);
+    assertThat(follow(nfa, INITIAL, epsilon())).isEqualTo(Node.TERMINAL);
+    Node node = follow(nfa, INITIAL, any());
+    node = assertOutEdge(nfa, node, any());
+    assertThat(node).isEqualTo(TERMINAL);
+  }
+
+  // Returns the simple edge matching exactly this one digit value.
+  SimpleEdge edge(int n) {
+    return SimpleEdge.fromMask(1 << n);
+  }
+
+  List<RangeSpecification> specs(String... s) {
+    return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList());
+  }
+
+  // Asserts that a node has only one out edge and returns that edge's target.
+  Node assertOutEdge(ValueGraph<Node, SimpleEdge> nfa, Node node, SimpleEdge edge) {
+    assertThat(nfa.successors(node)).hasSize(1);
+    Node target = Iterables.getOnlyElement(nfa.successors(node));
+    assertThat(nfa.edgeValue(node, target).get()).isEqualTo(edge);
+    return target;
+  }
+
+  // Asserts that a node has all the given edges.
+  void assertOutEdges(ValueGraph<Node, SimpleEdge> nfa, Node node, SimpleEdge... edges) {
+    assertThat(nfa.successors(node)).hasSize(edges.length);
+    List<Edge> out = nfa.successors(node).stream()
+        .map(t -> nfa.edgeValue(node, t).get())
+        .collect(toImmutableList());
+    assertThat(out).containsExactlyElementsIn(edges);
+  }
+
+  // Follows the given edge from a node (which must be in the graph), returning the target node
+  // (or null if the edge does not exist in the graph).
+  Node follow(ValueGraph<Node, SimpleEdge> nfa, Node node, SimpleEdge edge) {
+    return nfa.successors(node).stream()
+        .filter(t -> nfa.edgeValue(node, t).get().equals(edge))
+        .findFirst()
+        .orElse(null);
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatterTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatterTest.java
new file mode 100644
index 0000000000..1879c80e86
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatterTest.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.regex.RegexFormatter.FormatOption.FORCE_CAPTURING_GROUPS;
+import static com.google.i18n.phonenumbers.metadata.regex.RegexFormatter.FormatOption.FORCE_NON_CAPTURING_GROUPS;
+import static com.google.i18n.phonenumbers.metadata.regex.RegexFormatter.FormatOption.PRESERVE_CAPTURING_GROUPS;
+
+import com.google.common.base.Joiner;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class RegexFormatterTest {
+
+  // Luckily the formatter cares only about 3 special characters, '(', '|' and ')', so we only need
+  // to test a few very straightforward cases to cover everything.
+
+  @Test
+  public void testSimple() {
+    assertThat(RegexFormatter.format("abcd", PRESERVE_CAPTURING_GROUPS))
+        .isEqualTo("abcd");
+  }
+
+  @Test
+  public void testNested() {
+    assertThat(RegexFormatter.format("ab(cd|ef)gh", PRESERVE_CAPTURING_GROUPS)).isEqualTo(lines(
+        "ab(",
+        "  cd|",
+        "  ef",
+        ")gh"));
+
+    assertThat(RegexFormatter.format("ab(?:cd|ef)gh", PRESERVE_CAPTURING_GROUPS)).isEqualTo(lines(
+        "ab(?:",
+        "  cd|",
+        "  ef",
+        ")gh"));
+  }
+
+  @Test
+  public void testDoubleNested() {
+    assertThat(RegexFormatter.format("ab(cd(ef|gh)|ij)", PRESERVE_CAPTURING_GROUPS))
+        .isEqualTo(lines(
+            "ab(",
+            "  cd(",
+            "    ef|",
+            "    gh",
+            "  )|",
+            "  ij",
+            ")"));
+
+    assertThat(RegexFormatter.format("ab(cd(?:ef|gh)|ij)", PRESERVE_CAPTURING_GROUPS))
+        .isEqualTo(lines(
+            "ab(",
+            "  cd(?:",
+            "    ef|",
+            "    gh",
+            "  )|",
+            "  ij",
+            ")"));
+  }
+
+  @Test
+  public void testForceNonCapturingGroups() {
+    assertThat(RegexFormatter.format("ab(?:cd(ef|gh)|ij)", FORCE_NON_CAPTURING_GROUPS))
+        .isEqualTo(lines(
+            "ab(?:",
+            "  cd(?:",
+            "    ef|",
+            "    gh",
+            "  )|",
+            "  ij",
+            ")"));
+  }
+
+  @Test
+  public void testForceCapturingGroups() {
+    assertThat(RegexFormatter.format("ab(?:cd(ef|gh)|ij)", FORCE_CAPTURING_GROUPS)).isEqualTo(lines(
+        "ab(",
+        "  cd(",
+        "    ef|",
+        "    gh",
+        "  )|",
+        "  ij",
+        ")"));
+  }
+
+  private static String lines(String... s) {
+    return Joiner.on('\n').join(s);
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexGeneratorTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexGeneratorTest.java
new file mode 100644
index 0000000000..258719d4cb
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexGeneratorTest.java
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.base.CharMatcher.whitespace;
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.regex.RegexGenerator.basic;
+import static java.util.stream.Collectors.joining;
+
+import com.google.common.collect.ImmutableList;
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import java.util.Arrays;
+import java.util.List;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class RegexGeneratorTest {
+  @Test
+  public void testSimple() {
+    assertRegex(basic(), ranges("123xxx"), "123\\d{3}");
+    // This could be improved to "..." rather than ".{3}" saving 1 char, probably not worth it.
+    assertRegex(basic().withDotMatch(), ranges("123xxx"), "123.{3}");
+  }
+
+  @Test
+  public void testVariableLength() {
+    assertRegex(basic(), ranges("123xxx", "123xxxx", "123xxxxx", "123xxxxxx"), "123\\d{3,6}");
+  }
+
+  @Test
+  public void testTailOptimization() {
+    RangeTree dfa = ranges("123xxx", "123xxxx", "145xxx");
+    assertRegex(basic(), dfa, "1(?:23\\d{3,4}|45\\d{3})");
+    assertRegex(basic().withTailOptimization(), dfa, "1(?:23\\d?|45)\\d{3}");
+  }
+
+  @Test
+  public void testDfaFactorization() {
+    // Essentially create a "thin" wedge of specific non-determinism with the shorter (5-digit)
+    // numbers which prevents the larger ranges from being contiguous in the DFA.
+    RangeTree dfa = ranges("1234x", "1256x", "[0-4]xxxxxx", "[0-4]xxxxxxx");
+    assertRegex(basic(), dfa,
+        "[02-4]\\d{6,7}|",
+        "1(?:[013-9]\\d{5,6}|",
+        "2(?:[0-246-9]\\d{4,5}|",
+        "3(?:[0-35-9]\\d{3,4}|4\\d(?:\\d{2,3})?)|",
+        "5(?:[0-57-9]\\d{3,4}|6\\d(?:\\d{2,3})?)))");
+    assertRegex(basic().withDfaFactorization(), dfa, "[0-4]\\d{6,7}|12(?:34|56)\\d");
+  }
+
+  @Test
+  public void testSubgroupOptimization() {
+    // The subgraph of "everything except 95, 96 and 100" (this appears in China leading digits).
+    RangeTree postgraph = ranges("[02-8]", "1[1-9]", "10[1-9]", "9[0-47-9]");
+    RangeTree pregraph = ranges("123", "234", "345", "456", "567");
+
+    // Cross product of pre and post paths.
+    RangeTree subgraph = RangeTree.from(
+        pregraph.asRangeSpecifications().stream()
+            .flatMap(a -> postgraph.asRangeSpecifications().stream().map(a::extendBy)));
+
+    // Union in other paths to trigger repetition in the "basic" case.
+    RangeTree rest = ranges("128xx", "238xx", "348xx", "458xx", "568xx");
+    RangeTree dfa = rest.union(subgraph);
+
+    assertRegex(basic(), dfa,
+        "12(?:3(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
+        "23(?:4(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
+        "34(?:5(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
+        "45(?:6(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
+        "56(?:7(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)");
+
+    assertRegex(basic().withSubgroupOptimization(), dfa,
+        "(?:12|23|34|45|56)8\\d\\d|",
+        "(?:123|234|345|456|567)(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])");
+  }
+
+  @Test
+  public void testRegression_bug_65250963() {
+    RangeTree dfa = ranges(
+        "1387",
+        "1697",
+        "1524",
+        "1539",
+        "1768",
+        "1946");
+    assertRegex(basic(), dfa,
+        "1(?:",
+        "  (?:",
+        "    38|",
+        "    69",
+        "  )7|",
+        "  5(?:",
+        "    24|",
+        "    39",
+        "  )|",
+        "  768|",
+        "  946",
+        ")");
+  }
+
+  @Test
+  public void testRegression_bug_68929642() {
+    assertMatches(
+        "1\\d{6}(?:\\d{2})?",
+        ImmutableList.of("1234567", "123456789"),
+        ImmutableList.of("12345678"),
+        "1xxx_xxx", "1xx_xxx_xxx");
+
+    assertMatches(
+        "1\\d{6}[0-7]?",
+        ImmutableList.of("1234567", "12345670"),
+        ImmutableList.of("123456", "123456700"),
+        "1xxx_xxx", "1x_xxx_xx[0-7]");
+
+    assertMatches(
+        "\\d\\d?",
+        ImmutableList.of("1", "12"),
+        ImmutableList.of("", "123"),
+        "x", "xx");
+
+    assertMatches(
+        "\\d{1,3}",
+        ImmutableList.of("1", "12", "123"),
+        ImmutableList.of("", "1234"),
+        "x", "xx", "xxx");
+
+    assertMatches(
+        "\\d(?:\\d{3}(?:\\d{2})?)?",
+        ImmutableList.of("1", "1234", "123456"),
+        ImmutableList.of("", "12", "123", "12345", "1234567"),
+        "x", "xxxx", "xxx_xxx");
+
+    assertMatches(
+        "(?:\\d\\d(?:\\d(?:\\d{2,4})?)?)?",
+        ImmutableList.of("", "12", "123", "12345", "123456", "1234567"),
+        ImmutableList.of("1", "1234", "12345678"),
+        "", "xx", "xxx", "xx_xxx", "xxx_xxx", "xxxx_xxx");
+
+    assertMatches(
+        "(?:\\d{2})?",
+        ImmutableList.of("", "12"),
+        ImmutableList.of("1", "123"),
+        "", "xx");
+
+    assertMatches(
+        "\\d?",
+        ImmutableList.of("", "1"),
+        ImmutableList.of("12"),
+        "", "x");
+  }
+
+  // This does not check that the generated regex is the same as the input, but it does test some
+  // positive/negative matching cases against both and verifies that the DFA for both are equal.
+  private static void assertMatches(
+      String pattern, List<String> matchNumbers, List<String> noMatchNumbers, String... specs) {
+    String regex = basic().toRegex(ranges(specs));
+    assertThat(regex).isEqualTo(pattern);
+
+    // Test the given positive/negative match numbers and expect the same behaviour from both.
+    for (String number : matchNumbers) {
+      assertThat(number).matches(pattern);
+      assertThat(number).matches(regex);
+    }
+    for (String number : noMatchNumbers) {
+      assertThat(number).doesNotMatch(pattern);
+      assertThat(number).doesNotMatch(regex);
+    }
+  }
+
+  private static void assertRegex(RegexGenerator generator, RangeTree dfa, String... lines) {
+    String regex = generator.toRegex(dfa);
+    String expected = Arrays.stream(lines).map(whitespace()::removeFrom).collect(joining());
+    assertThat(regex).isEqualTo(expected);
+  }
+
+  private static RangeTree ranges(String... specs) {
+    return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse));
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/SubgraphOptimizerTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/SubgraphOptimizerTest.java
new file mode 100644
index 0000000000..46e9f94cd0
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/SubgraphOptimizerTest.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.common.truth.Truth8.assertThat;
+
+import com.google.i18n.phonenumbers.metadata.RangeSpecification;
+import com.google.i18n.phonenumbers.metadata.RangeTree;
+import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
+import com.google.i18n.phonenumbers.metadata.regex.SubgroupOptimizer.LinkNodeVisitor;
+import java.util.Arrays;
+import java.util.Optional;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class SubgraphOptimizerTest {
+  // The subgraph of "everything except 95, 96 and 100" (this appears in China leading digits).
+  // Note that unlike China, there's also an early terminating '9' in the subgraph to ensure that
+  // the entire subgraph is extracted (including teminating node).
+  private static final RangeTree POSTGRAPH = ranges("[02-9]", "1[1-9]", "10[1-9]", "9[0-47-9]");
+
+  // Some prefixes which come before the subgraph.
+  private static final RangeTree PREGRAPH = ranges("123", "234", "345", "456", "567");
+
+  // Cross product of pre and post paths.
+  private static final RangeTree SUBGRAPH = RangeTree.from(
+      PREGRAPH.asRangeSpecifications().stream()
+          .flatMap(a -> POSTGRAPH.asRangeSpecifications().stream().map(a::extendBy)));
+
+  // Additional paths which share edges in the subgraph and will cause repetition in regular
+  // expressions. Also add a couple of early terminating paths "on the way to" the subgroup.
+  // Note however that a terminating path that reaches the root of the subgraph (e.g. "123") will
+  // cause a split in the DFA at the root node (one terminating, one not terminating).
+  private static final RangeTree TEST_RANGES =
+      SUBGRAPH.union(ranges("128xx", "238xx", "348xx", "458xx", "568xx", "12", "34"));
+
+  @Test
+  public void testSubgraphWeightAndInOrder() {
+    LinkNodeVisitor v = new LinkNodeVisitor();
+    TEST_RANGES.accept(v);
+    DfaNode n = v.getHighestCostNode();
+    assertThat(n).isNotNull();
+    // 5 paths in PREGRAPH which reach the root of POSTGRAPH.
+    assertThat(v.getInOrder(n)).isEqualTo(5);
+    // 7 edges in POSTGRAPH with a total weight of 27:
+    // "[02-8]" = 6, "1", "0", "9" = 3, 2 x "[1-9]" = 10, "[0-47-9]" = 8
+    assertThat(v.getSubgraphWeight(n)).isEqualTo(27);
+  }
+
+  @Test
+  public void testSubgraphExtraction() {
+    Optional<RangeTree> extracted = SubgroupOptimizer.extractRepeatingSubgraph(TEST_RANGES);
+    assertThat(extracted).hasValue(SUBGRAPH);
+    // The "bridge" node is the same, so we extract the whole graph (so we return nothing).
+    assertThat(SubgroupOptimizer.extractRepeatingSubgraph(SUBGRAPH)).isEmpty();
+    // There's no repetition in this graph, so return nothing.
+    assertThat(SubgroupOptimizer.extractRepeatingSubgraph(ranges("123", "234", "345"))).isEmpty();
+  }
+
+  private static RangeTree ranges(String... specs) {
+    return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse));
+  }
+}
diff --git a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizerTest.java b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizerTest.java
new file mode 100644
index 0000000000..0884d87f35
--- /dev/null
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizerTest.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.i18n.phonenumbers.metadata.regex;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
+import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;
+
+import com.google.common.graph.ValueGraph;
+import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TrailingPathOptimizerTest {
+  @Test
+  public void testSimple() {
+    NfaBuilder nfa = new NfaBuilder();
+    nfa.addPath(INITIAL, TERMINAL, "12xx");
+    nfa.addPath(INITIAL, TERMINAL, "34xxxx");
+    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());
+
+    // Expect the common trailing "xx" to be factored out at some new join point.
+    NfaBuilder expected = new NfaBuilder();
+    Node join = expected.addPath(INITIAL, "12");
+    expected.addPath(INITIAL, join, "34xx");
+    expected.addPath(join, TERMINAL, "xx");
+
+    assertEquivalent(actual, expected);
+  }
+
+  @Test
+  public void testTrailingOptionalGroup() {
+    NfaBuilder nfa = new NfaBuilder();
+    nfa.addPath(INITIAL, TERMINAL, "12xx");
+    // Add path "34xx(xx)?"
+    Node optStart = nfa.addPath(INITIAL, "34xx");
+    nfa.addOptionalPath(optStart, TERMINAL, "xx");
+
+    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());
+
+    // Expect the common trailing "xx" to be factored out at some new join point.
+    NfaBuilder expected = new NfaBuilder();
+    Node join = expected.addPath(INITIAL, "12");
+    // Add "34(xx)?" up to the joining node.
+    optStart = expected.addPath(INITIAL, "34");
+    expected.addOptionalPath(optStart, join, "xx");
+    // Add the trailing "xx".
+    expected.addPath(join, TERMINAL, "xx");
+
+    assertEquivalent(actual, expected);
+  }
+
+  @Test
+  public void testDoubleRecursion() {
+    NfaBuilder nfa = new NfaBuilder();
+    nfa.addPath(INITIAL, TERMINAL, "12xx");
+    nfa.addPath(INITIAL, TERMINAL, "34xxxx");
+    // Add path "56xxxx(xx)?"
+    Node optStart = nfa.addPath(INITIAL, "56xxxx");
+    nfa.addOptionalPath(optStart, TERMINAL, "xx");
+    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());
+
+    // Factoring should be applied twice to pull out 2 lots of "xx".
+    // How I wish we had a way to embed proper graphs in JavaDoc!
+    //
+    //    ,-----------12-----------v
+    // (I)------34----->(1)--xx-->(2)--xx-->(T)
+    //    `-56-->()--xx--^
+    //            `--e---^
+    //
+    NfaBuilder expected = new NfaBuilder();
+    Node secondJoin = expected.addPath(INITIAL, "12");
+    expected.addPath(secondJoin, TERMINAL, "xx");
+    Node firstJoin = expected.addPath(INITIAL, "34");
+    expected.addPath(firstJoin, secondJoin, "xx");
+    optStart = expected.addPath(INITIAL, "56");
+    expected.addOptionalPath(optStart, firstJoin, "xx");
+
+    assertEquivalent(actual, expected);
+  }
+
+  @Test
+  public void testNoChangeIfNoCommonFactor() {
+    NfaBuilder nfa = new NfaBuilder();
+    nfa.addPath(INITIAL, TERMINAL, "12xxxxxx");
+    // Add path "34xxx(xx)?" which, while it shares 'xxx' with '12xxxxxx', will not be factored
+    // because splitting out 'xxx' would make the resulting regular expression longer
+    // (e.g. "(?:34\d{2}?|12\d{3})\d{3}" is longer than "34\d{2}?\d{3}|12\d{6}").
+    //
+    // Note that there are some cases in which this isn't true (shorter sequences like 'x' might be
+    // splittable without cost, but they are unlikely to ever make the expression shorter,
+    // especially if they result in adding new parentheses for grouping.
+    Node optStart = nfa.addPath(INITIAL, "34xxx");
+    nfa.addOptionalPath(optStart, TERMINAL, "xx");
+
+    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());
+    assertEquivalent(actual, nfa);
+  }
+
+  private static void assertEquivalent(ValueGraph<Node, SimpleEdge> actual, NfaBuilder expected) {
+    // This is a somewhat cheeky way to test graph isomorphism and relies on the fact that graph
+    // flattening is deterministic according to how edges sort and doesn't care about node values.
+    // It also, obviously, relies on the flattening code to be vaguely well tested.
+    assertThat(NfaFlattener.flatten(actual)).isEqualTo(NfaFlattener.flatten(expected.graph()));
+  }
+}
diff --git a/metadata/src/test/proto/regression_test.proto b/metadata/src/test/proto/regression_test.proto
new file mode 100644
index 0000000000..c2a460b830
--- /dev/null
+++ b/metadata/src/test/proto/regression_test.proto
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 The Libphonenumber Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+package i18n.phonenumbers.internal.finitestatematcher.compiler;
+
+option java_package = "com.google.i18n.phonenumbers.internal.finitestatematcher.compiler";
+option java_outer_classname = "RegressionTestProto";
+
+// A set of regression tests.
+message Tests {
+  repeated TestCase test_case = 1;
+}
+
+// A single regression test entry.
+message TestCase {
+  // A name for the test, ideally unique.
+  string name = 1;
+  // If set true, expect that the test will fail 100% of the time. This is
+  // useful to test that test numbers have enough coverage to force a failure
+  // and is typically achieved by modifying an input range after generating a
+  // passing test (or carefully modifying the output bytecodes). Note that not
+  // all changes will make a test fail 100% of the time, so care must be taken
+  // to avoid creating a flaky test (e.g. don't change a "[0-3]" to "[0-5]", as
+  // this only fails if the test number contains a 4 or 5 at the corresponding
+  // index, change it to "[4-6]" so there's no overlap and at least one test
+  // number that's valid for that range will not be accepted by the matcher).
+  bool should_fail = 2;
+  // The input ranges (in the form of range specifications) which form the DFA
+  // to be tested (e.g. "1[2-5]678xxxxx" etc...).
+  repeated string range = 3;
+  // The expected output bytes, encoded in test files using C-style hex notation
+  // (i.e. \xHH). This can be split over multiple lines for readability.
+  repeated bytes expected = 4;
+}