Skip to content

Commit

Permalink
add regular chars
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Aug 7, 2023
1 parent 5f28e65 commit b4b245b
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 8 deletions.
4 changes: 2 additions & 2 deletions velox/type/Subfield.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

namespace facebook::velox::common {

Subfield::Subfield(const std::string& path) {
Tokenizer tokenizer(path);
Subfield::Subfield(const std::string& path, const std::vector<char>& regularChars) {
Tokenizer tokenizer(path, regularChars);
VELOX_CHECK(tokenizer.hasNext(), "Column name is missing: {}", path);

auto firstElement = tokenizer.next();
Expand Down
2 changes: 1 addition & 1 deletion velox/type/Subfield.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class Subfield {
};

public:
explicit Subfield(const std::string& path);
explicit Subfield(const std::string& path, const std::vector<char>& regularChars = {});

explicit Subfield(std::vector<std::unique_ptr<PathElement>>&& path);

Expand Down
16 changes: 12 additions & 4 deletions velox/type/Tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

namespace facebook::velox::common {

Tokenizer::Tokenizer(const std::string& path) : path_(path) {
Tokenizer::Tokenizer(const std::string& path, const std::vector<char>& regularChars) : path_(path), regularChars_(regularChars) {
state = State::kNotReady;
index_ = 0;
}
Expand Down Expand Up @@ -87,6 +87,9 @@ void Tokenizer::match(char expected) {
}

bool Tokenizer::tryMatch(char expected) {
if (treatAsRegularCharacter(expected)) {
return false;
}
if (!hasNextCharacter() || peekCharacter() != expected) {
return false;
}
Expand Down Expand Up @@ -143,13 +146,18 @@ std::unique_ptr<Subfield::PathElement> Tokenizer::matchUnquotedSubscript() {
return std::make_unique<Subfield::LongSubscript>(index);
}

bool Tokenizer::isUnquotedPathCharacter(char c) {
return c == ':' || c == '$' || c == '-' || c == '/' || c == '@' || c == '|' ||
bool Tokenizer::treatAsRegularCharacter(char c) {
return std::find(
regularChars_.begin(), regularChars_.end(), c) != regularChars_.end();
}

bool Tokenizer::isUnquotedPathCharacter(char c) {
return treatAsRegularCharacter(c) || c == ':' || c == '$' || c == '-' || c == '/' || c == '@' || c == '|' ||
c == '#' || isUnquotedSubscriptCharacter(c);
}

bool Tokenizer::isUnquotedSubscriptCharacter(char c) {
return c == '-' || c == '_' || isalnum(c);
return treatAsRegularCharacter(c) || c == '-' || c == '_' || isalnum(c);
}

std::unique_ptr<Subfield::PathElement> Tokenizer::matchQuotedSubscript() {
Expand Down
5 changes: 4 additions & 1 deletion velox/type/Tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Tokenizer {
kFailed,
};

explicit Tokenizer(const std::string& path);
explicit Tokenizer(const std::string& path, const std::vector<char>& regularChars = {});

bool hasNext();

Expand All @@ -51,6 +51,7 @@ class Tokenizer {
const char UNICODE_CARET = '^';

const std::string path_;
std::vector<char> regularChars_;
int index_;
State state;
bool firstSegment = true;
Expand All @@ -74,6 +75,8 @@ class Tokenizer {

bool tryToComputeNext();

bool treatAsRegularCharacter(char c);

void invalidSubfieldPath();

bool isUnquotedPathCharacter(char c);
Expand Down

0 comments on commit b4b245b

Please sign in to comment.