Skip to content

Commit

Permalink
Project import generated by Copybara. (#2890)
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 509849832
  • Loading branch information
hagbard authored Feb 15, 2023
1 parent 6223a36 commit f63cf2c
Show file tree
Hide file tree
Showing 45 changed files with 6,559 additions and 111 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright (C) 2022 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.i18n.phonenumbers.metadata;

import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Integer.parseUnsignedInt;

import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.ImmutableSortedSet;
import java.util.List;
import java.util.NavigableSet;
import java.util.TreeSet;

/** Parses strings of form "4,7-9,11" which are used as length specifiers across LPN metadata */
public final class LengthsParser {

private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults(whitespace());
private static final Splitter RANGE_SPLITTER =
Splitter.on('-').trimResults(whitespace()).limit(2);
private static final CharMatcher ALLOWED_CHARACTERS =
CharMatcher.inRange('0', '9').or(CharMatcher.anyOf("-,")).or(whitespace());

/** Returns the set of integers specified by this string. */
public static ImmutableSortedSet<Integer> parseLengths(String s) {
checkArgument(
ALLOWED_CHARACTERS.matchesAllOf(s),
"Length specifier contains forbidden characters: %s",
s);
NavigableSet<Integer> lengths = new TreeSet<>();
for (String lengthOrRange : COMMA_SPLITTER.split(s)) {
if (lengthOrRange.contains("-")) {
List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange);
int lo = parseUnsignedInt(lohi.get(0));
int hi = parseUnsignedInt(lohi.get(1));
checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi);
checkArgument(
lengths.isEmpty() || lo > lengths.last(),
"Numbers in length specifier are out of order: %s",
s);
lengths.addAll(ContiguousSet.closed(lo, hi));
} else {
int length = parseUnsignedInt(lengthOrRange);
checkArgument(
lengths.isEmpty() || length > lengths.last(),
"Numbers in length specifier are out of order: %s",
s);
lengths.add(length);
}
}
return ImmutableSortedSet.copyOf(lengths);
}

private LengthsParser() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public final class Types {
.put(XML_VOIP, VOIP)
.put(XML_UAN, UAN)
.put(XML_VOICEMAIL, VOICEMAIL)
.build();
.buildOrThrow();

/** Returns the set of valid XML type names. */
public static ImmutableSet<String> getXmlNames() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.i18n.phonenumbers.metadata.finitestatematcher;

import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode.State;

/**
* Matches phone number regular expressions based on compact compiled data generated by
* {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler
* MatcherCompiler}. Typically the phone number regular expression will be compiled at build time
* and the resulting matcher data will be packaged into the binary which needs it, or downloaded at
* run time.
* <p>
* This class is designed to be lightweight and fast, and should be simple to implement in many
* different languages (C++, Python, JS, etc.).
*
* TODO: Consider UnisgnedBytes.toInt(x) to avoid lots of (x & 0xFF).
*/
public abstract class DigitSequenceMatcher {

/** Possible result types returned by a match operation. */
public enum Result {
/** The match operation was a success and the input was matched. */
MATCHED,
/** The match operation failed because unexpected input was encountered. */
INVALID,
/**
* The match operation failed because the input terminated too soon (ie, the input was a
* valid prefix for the matcher).
*/
TOO_SHORT,
/**
* The match operation failed due to the existence of additional input after matching had
* completed (ie, the the input would have matched if it were shorter).
*/
TOO_LONG;
}

/** An iterator of {@code int}, used to supply the matcher with a sequence of input digits. */
public interface DigitSequence {
/** Returns true if there are more digits available. */
boolean hasNext();

/**
* Return the next digit value (from 0 to 9 inclusive, not a char value). The matcher does not
* test for invalid digits, so returning values outside this range will have undefined results,
* including false positive results.
*/
int next();
}

/** Internal abstraction to allow matching over either byte arrays or strings. */
interface DataView {
/** Return the unsigned byte value at the given offset from the current position. */
int peekByte(int offset);

/** Return the unsigned byte value at the current position and move ahead 1 byte. */
int readByte();

/** Return the unsigned short value at the current position and move ahead 2 bytes. */
int readShort();

/** Return the unsigned int value at the current position and move ahead 4 bytes. */
int readInt();

/** Adjust the current position by the given (non-negative) offset. */
State branch(int offset);

/**
* Adjust the current position by the unsigned byte offset value read from the current
* position plus the given index. This is used to implement maps and branching ranges.
*/
State jumpTable(int index);
}

/**
* Creates a new matcher which reads instructions directly from the given byte array. Typically
* it is expected that this method will consume byte arrays packaged into a binary at build time
* (the MatcherCompiler is not suitable for direct parsing of regular expressions at run time).
* <p>
* See {@code MatcherCompiler.compile(...)}.
*/
public static DigitSequenceMatcher create(byte[] data) {
if (data.length == 0) {
throw new IllegalArgumentException("matcher data cannot be empty");
}
return new ByteArrayMatcher(data);
}

/**
* Creates a new matcher which reads instructions from the given string. Typically it is expected
* that this method will be used when matcher data is packaged as literal Java string constants
* in (auto-generated) source files.
* <p>
* See {@code MatcherCompiler.compileToUnquotedJavaSourceString(...)}.
*/
public static DigitSequenceMatcher create(String data) {
if (data.isEmpty()) {
throw new IllegalArgumentException("matcher data cannot be empty");
}
return new StringMatcher(data);
}

abstract DataView newDataView();

abstract int size();

/** Matches the input against this matcher, returning a result code. */
public Result match(DigitSequence in) {
State state = runMatcher(in);
switch (state) {
case TERMINAL:
return !in.hasNext() ? Result.MATCHED : Result.TOO_LONG;
case TRUNCATED:
return Result.TOO_SHORT;
case INVALID:
return Result.INVALID;
default:
throw new AssertionError("unexpected state: " + state);
}
}

private State runMatcher(DigitSequence in) {
DataView data = newDataView();
State state;
do {
state = OpCode.decode(data.peekByte(0)).execute(data, in);
} while (state == State.CONTINUE);
return state;
}

@Override
public String toString() {
int size = size();
StringBuilder out = new StringBuilder(size + " :: [ ");
DataView data = newDataView();
while (size-- > 0) {
out.append(Integer.toHexString(data.readByte())).append(", ");
}
out.setLength(out.length() - 2);
out.append(" ]");
return out.toString();
}

/** A matcher for reading instructions from a byte array. */
private static final class ByteArrayMatcher extends DigitSequenceMatcher {

private class ByteArrayData implements DataView {
int position = 0;

@Override public int peekByte(int offset) {
return bytes[position + offset] & 0xFF;
}

@Override public int readByte() {
return bytes[position++] & 0xFF;
}

@Override public int readShort() {
return (readByte() << 8) | readByte();
}

@Override public int readInt() {
return (readShort() << 16) | readShort();
}

@Override public State branch(int offset) {
position += offset;
return offset != 0 ? State.CONTINUE : State.TERMINAL;
}

@Override public State jumpTable(int index) {
return branch(peekByte(index));
}
}

private final byte[] bytes;

private ByteArrayMatcher(byte[] data) {
this.bytes = data;
}

@Override
DataView newDataView() {
return new ByteArrayData();
}

@Override
int size() {
return bytes.length;
}
}

/** A matcher for reading instructions from a String. */
private static final class StringMatcher extends DigitSequenceMatcher {

/*
* Note: Using unsigned shift "x >>> 1" is more likely to be free as part of a data load
* instruction than "x / 2".
*/

private class StringData implements DataView {
int position = 0;

@Override public int peekByte(int offset) {
offset += position;
int data = bytes.charAt(offset >>> 1);
// char := hi [ even-byte | odd-byte ] lo
return (offset & 1) != 0 ? data & 0xFF : data >>> 8;
}

@Override public int readByte() {
int data = bytes.charAt(position >>> 1);
// char := hi [ even-byte | odd-byte ] lo
data = (position & 1) != 0 ? data & 0xFF : data >>> 8;
position += 1;
return data;
}

@Override public int readShort() {
int data = bytes.charAt(position >>> 1);
// Adding 2 early does not affect odd/even (but does reference next char).
position += 2;
if ((position & 1) != 0) {
data = ((data & 0xFF) << 8) | (bytes.charAt(position >>> 1) >>> 8);
}
return data;
}

@Override public int readInt() {
return (readShort() << 16) | readShort();
}

@Override public State branch(int offset) {
position += offset;
return offset != 0 ? State.CONTINUE : State.TERMINAL;
}

@Override public State jumpTable(int index) {
return branch(peekByte(index));
}
}

private final String bytes;

private StringMatcher(String bytes) {
this.bytes = bytes;
}

@Override
DataView newDataView() {
return new StringData();
}

@Override
int size() {
int size = 2 * bytes.length();
if ((bytes.charAt(bytes.length() - 1) & 0xFF) == 0xFF) {
size -= 1;
}
return size;
}
}

/** An iterator of {@code int} that yields a sequence of input digits from a string. */
private static final class StringDigits implements DigitSequence {
private final CharSequence number;
private int n = 0;

private StringDigits(CharSequence number) {
this.number = number;
}

@Override public int next() {
if (n < 0 || n >= number.length()) {
throw new IndexOutOfBoundsException(
"index '" + n + "' out of bounds for input: " + number);
}
char c = number.charAt(n);
if (c < '0' || c > '9') {
throw new IllegalArgumentException(
"non-digit character '" + c + "' [" + ((int) c) + "] at index " + n + " in: " + number);
}
n++;
return c - '0';
}

@Override public boolean hasNext() {
return n < number.length();
}
}

/**
* Returns an instance of DigitSequence based on the input string. The input string may only
* contain digits.
*/
public static DigitSequence digitsFromString(CharSequence number) {
return new StringDigits(number);
}

/** A matcher has no internal state and is just a factory for data specific implementations. */
private DigitSequenceMatcher() { }
}
Loading

0 comments on commit f63cf2c

Please sign in to comment.